X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2Fsm%2FStorage.c;h=02344003b89b1849bd9a63c7d774343b691c0110;hp=c02adf9dcc3cbee9c355b3dd9872e8be30893923;hb=5d52d9b64c21dcf77849866584744722f8121389;hpb=c09b0ab58d435213b7c9a034728a88cf36b62e81 diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index c02adf9..0234400 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team, 1998-2006 + * (c) The GHC Team, 1998-2008 * * Storage manager front end * @@ -13,26 +13,26 @@ #include "PosixSource.h" #include "Rts.h" + +#include "Storage.h" #include "RtsUtils.h" -#include "RtsFlags.h" #include "Stats.h" -#include "Hooks.h" #include "BlockAlloc.h" -#include "MBlock.h" #include "Weak.h" #include "Sanity.h" #include "Arena.h" -#include "OSThreads.h" #include "Capability.h" -#include "Storage.h" #include "Schedule.h" #include "RetainerProfile.h" // for counting memory blocks (memInventory) #include "OSMem.h" #include "Trace.h" +#include "GC.h" +#include "Evac.h" -#include #include +#include "ffi.h" + /* * All these globals require sm_mutex to access in THREADED_RTS mode. */ @@ -40,23 +40,16 @@ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; -bdescr *small_alloc_list; /* allocate()d small objects */ -bdescr *pinned_object_block; /* allocate pinned objects into this block */ -nat alloc_blocks; /* number of allocate()d blocks since GC */ -nat alloc_blocks_lim; /* approximate limit on alloc_blocks */ +nat alloc_blocks_lim; /* GC if n_large_blocks in any nursery + * reaches this. */ -StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */ -StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */ +bdescr *exec_block; generation *generations = NULL; /* all the generations */ generation *g0 = NULL; /* generation 0, for convenience */ generation *oldest_gen = NULL; /* oldest generation, for convenience */ -step *g0s0 = NULL; /* generation 0, step 0, for convenience */ - -ullong total_allocated = 0; /* total memory allocated during run */ -nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */ -step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */ +nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */ #ifdef THREADED_RTS /* @@ -64,61 +57,57 @@ step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS * * simultaneous access by two STG threads. */ Mutex sm_mutex; -/* - * This mutex is used by atomicModifyMutVar# only - */ -Mutex atomic_modify_mutvar_mutex; #endif - -/* - * Forward references - */ -static void *stgAllocForGMP (size_t size_in_bytes); -static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size); -static void stgDeallocForGMP (void *ptr, size_t size); +static void allocNurseries ( void ); static void -initStep (step *stp, int g, int s) +initGeneration (generation *gen, int g) { - stp->no = s; - stp->blocks = NULL; - stp->n_blocks = 0; - stp->old_blocks = NULL; - stp->n_old_blocks = 0; - stp->gen = &generations[g]; - stp->gen_no = g; - stp->hp = NULL; - stp->hpLim = NULL; - stp->hp_bd = NULL; - stp->scavd_hp = NULL; - stp->scavd_hpLim = NULL; - stp->scan = NULL; - stp->scan_bd = NULL; - stp->large_objects = NULL; - stp->n_large_blocks = 0; - stp->new_large_objects = NULL; - stp->scavenged_large_objects = NULL; - stp->n_scavenged_large_blocks = 0; - stp->is_compacted = 0; - stp->bitmap = NULL; + gen->no = g; + gen->collections = 0; + gen->par_collections = 0; + gen->failed_promotions = 0; + gen->max_blocks = 0; + gen->blocks = NULL; + gen->n_blocks = 0; + gen->n_words = 0; + gen->live_estimate = 0; + gen->old_blocks = NULL; + gen->n_old_blocks = 0; + gen->large_objects = NULL; + gen->n_large_blocks = 0; + gen->n_new_large_blocks = 0; + gen->mut_list = allocBlock(); + gen->scavenged_large_objects = NULL; + gen->n_scavenged_large_blocks = 0; + gen->mark = 0; + gen->compact = 0; + gen->bitmap = NULL; +#ifdef THREADED_RTS + initSpinLock(&gen->sync_large_objects); +#endif + gen->threads = END_TSO_QUEUE; + gen->old_threads = END_TSO_QUEUE; } void initStorage( void ) { - nat g, s; - generation *gen; + nat g, n; if (generations != NULL) { // multi-init protection return; } + initMBlocks(); + /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be * doing something reasonable. */ - ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info)); + /* We use the NOT_NULL variant or gcc warns that the test is always true */ + ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure)); ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure)); @@ -139,7 +128,6 @@ initStorage( void ) #if defined(THREADED_RTS) initMutex(&sm_mutex); - initMutex(&atomic_modify_mutvar_mutex); #endif ACQUIRE_SM_LOCK; @@ -151,105 +139,41 @@ initStorage( void ) /* Initialise all generations */ for(g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen = &generations[g]; - gen->no = g; - gen->mut_list = allocBlock(); - gen->collections = 0; - gen->failed_promotions = 0; - gen->max_blocks = 0; + initGeneration(&generations[g], g); } /* A couple of convenience pointers */ g0 = &generations[0]; oldest_gen = &generations[RtsFlags.GcFlags.generations-1]; - /* Allocate step structures in each generation */ - if (RtsFlags.GcFlags.generations > 1) { - /* Only for multiple-generations */ - - /* Oldest generation: one step */ - oldest_gen->n_steps = 1; - oldest_gen->steps = - stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step"); - - /* set up all except the oldest generation with 2 steps */ - for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) { - generations[g].n_steps = RtsFlags.GcFlags.steps; - generations[g].steps = - stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_), - "initStorage: steps"); - } - - } else { - /* single generation, i.e. a two-space collector */ - g0->n_steps = 1; - g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps"); - } - -#ifdef THREADED_RTS - n_nurseries = n_capabilities; - nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), - "initStorage: nurseries"); -#else - n_nurseries = 1; - nurseries = g0->steps; // just share nurseries[0] with g0s0 -#endif - - /* Initialise all steps */ - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - initStep(&generations[g].steps[s], g, s); - } - } - -#ifdef THREADED_RTS - for (s = 0; s < n_nurseries; s++) { - initStep(&nurseries[s], 0, s); - } -#endif + nurseries = stgMallocBytes(n_capabilities * sizeof(struct nursery_), + "initStorage: nurseries"); /* Set up the destination pointers in each younger gen. step */ for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) { - for (s = 0; s < generations[g].n_steps-1; s++) { - generations[g].steps[s].to = &generations[g].steps[s+1]; - } - generations[g].steps[s].to = &generations[g+1].steps[0]; + generations[g].to = &generations[g+1]; } - oldest_gen->steps[0].to = &oldest_gen->steps[0]; - -#ifdef THREADED_RTS - for (s = 0; s < n_nurseries; s++) { - nurseries[s].to = generations[0].steps[0].to; - } -#endif + oldest_gen->to = oldest_gen; /* The oldest generation has one step. */ - if (RtsFlags.GcFlags.compact) { + if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) { if (RtsFlags.GcFlags.generations == 1) { - errorBelch("WARNING: compaction is incompatible with -G1; disabled"); + errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled"); } else { - oldest_gen->steps[0].is_compacted = 1; + oldest_gen->mark = 1; + if (RtsFlags.GcFlags.compact) + oldest_gen->compact = 1; } } -#ifdef THREADED_RTS - if (RtsFlags.GcFlags.generations == 1) { - errorBelch("-G1 is incompatible with -threaded"); - stg_exit(EXIT_FAILURE); - } -#endif - - /* generation 0 is special: that's the nursery */ generations[0].max_blocks = 0; - /* G0S0: the allocation area. Policy: keep the allocation area + /* The allocation area. Policy: keep the allocation area * small to begin with, even if we have a large suggested heap * size. Reason: we're going to do a major collection first, and we * don't want it to be a big one. This vague idea is borne out by * rigorous experimental evidence. */ - g0s0 = &generations[0].steps[0]; - allocNurseries(); weak_ptr_list = NULL; @@ -257,12 +181,25 @@ initStorage( void ) revertible_caf_list = NULL; /* initialise the allocate() interface */ - small_alloc_list = NULL; - alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; - /* Tell GNU multi-precision pkg about our custom alloc functions */ - mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP); + exec_block = NULL; + +#ifdef THREADED_RTS + initSpinLock(&gc_alloc_block_sync); + whitehole_spin = 0; +#endif + + N = 0; + + // allocate a block for each mut list + for (n = 0; n < n_capabilities; n++) { + for (g = 1; g < RtsFlags.GcFlags.generations; g++) { + capabilities[n].mut_lists[g] = allocBlock(); + } + } + + initGcThreads(); IF_DEBUG(gc, statDescribeGens()); @@ -278,16 +215,13 @@ exitStorage (void) void freeStorage (void) { - nat g; - - for(g = 0; g < RtsFlags.GcFlags.generations; g++) - stgFree(generations[g].steps); stgFree(generations); freeAllMBlocks(); #if defined(THREADED_RTS) closeMutex(&sm_mutex); - closeMutex(&atomic_modify_mutvar_mutex); #endif + stgFree(nurseries); + freeGcThreads(); } /* ----------------------------------------------------------------------------- @@ -295,20 +229,20 @@ freeStorage (void) The entry code for every CAF does the following: - - builds a CAF_BLACKHOLE in the heap - - pushes an update frame pointing to the CAF_BLACKHOLE + - builds a BLACKHOLE in the heap + - pushes an update frame pointing to the BLACKHOLE - invokes UPD_CAF(), which: - calls newCaf, below - - updates the CAF with a static indirection to the CAF_BLACKHOLE + - updates the CAF with a static indirection to the BLACKHOLE - Why do we build a BLACKHOLE in the heap rather than just updating + Why do we build an BLACKHOLE in the heap rather than just updating the thunk directly? It's so that we only need one kind of update frame - otherwise we'd need a static version of the update frame too. newCaf() does the following: - - it puts the CAF on the oldest generation's mut-once list. - This is so that we can treat the CAF as a root when collecting + - it puts the CAF on the oldest generation's mutable list. + This is so that we treat the CAF as a root when collecting younger generations. For GHCI, we have additional requirements when dealing with CAFs: @@ -332,10 +266,8 @@ freeStorage (void) -------------------------------------------------------------------------- */ void -newCAF(StgClosure* caf) +newCAF(StgRegTable *reg, StgClosure* caf) { - ACQUIRE_SM_LOCK; - if(keepCAFs) { // HACK: @@ -349,23 +281,25 @@ newCAF(StgClosure* caf) // do another hack here and do an address range test on caf to figure // out whether it is from a dynamic library. ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info; + + ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex ((StgIndStatic *)caf)->static_link = caf_list; caf_list = caf; + RELEASE_SM_LOCK; } else { - /* Put this CAF on the mutable list for the old generation. - * This is a HACK - the IND_STATIC closure doesn't really have - * a mut_link field, but we pretend it has - in fact we re-use - * the STATIC_LINK field for the time being, because when we - * come to do a major GC we won't need the mut_link field - * any more and can use it as a STATIC_LINK. - */ + // Put this CAF on the mutable list for the old generation. ((StgIndStatic *)caf)->saved_info = NULL; - recordMutableGen(caf, oldest_gen); + recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no); } - - RELEASE_SM_LOCK; +} + +// External API for setting the keepCAFs flag. see #3900. +void +setKeepCAFs (void) +{ + keepCAFs = 1; } // An alternate version of newCaf which is used for dynamically loaded @@ -378,7 +312,7 @@ newCAF(StgClosure* caf) // The linker hackily arranges that references to newCaf from dynamic // code end up pointing to newDynCAF. void -newDynCAF(StgClosure *caf) +newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf) { ACQUIRE_SM_LOCK; @@ -394,7 +328,7 @@ newDynCAF(StgClosure *caf) -------------------------------------------------------------------------- */ static bdescr * -allocNursery (step *stp, bdescr *tail, nat blocks) +allocNursery (bdescr *tail, nat blocks) { bdescr *bd; nat i; @@ -415,8 +349,7 @@ allocNursery (step *stp, bdescr *tail, nat blocks) if (tail != NULL) { tail->u.back = bd; } - bd->step = stp; - bd->gen_no = 0; + initBdescr(bd, g0, g0); bd->flags = 0; bd->free = bd->start; tail = bd; @@ -428,33 +361,25 @@ allocNursery (step *stp, bdescr *tail, nat blocks) static void assignNurseriesToCapabilities (void) { -#ifdef THREADED_RTS nat i; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { capabilities[i].r.rNursery = &nurseries[i]; capabilities[i].r.rCurrentNursery = nurseries[i].blocks; capabilities[i].r.rCurrentAlloc = NULL; } -#else /* THREADED_RTS */ - MainCapability.r.rNursery = &nurseries[0]; - MainCapability.r.rCurrentNursery = nurseries[0].blocks; - MainCapability.r.rCurrentAlloc = NULL; -#endif } -void +static void allocNurseries( void ) { nat i; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { nurseries[i].blocks = - allocNursery(&nurseries[i], NULL, - RtsFlags.GcFlags.minAllocAreaSize); - nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize; - nurseries[i].old_blocks = NULL; - nurseries[i].n_old_blocks = 0; + allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize); + nurseries[i].n_blocks = + RtsFlags.GcFlags.minAllocAreaSize; } assignNurseriesToCapabilities(); } @@ -464,14 +389,12 @@ resetNurseries( void ) { nat i; bdescr *bd; - step *stp; - for (i = 0; i < n_nurseries; i++) { - stp = &nurseries[i]; - for (bd = stp->blocks; bd; bd = bd->link) { + for (i = 0; i < n_capabilities; i++) { + for (bd = nurseries[i].blocks; bd; bd = bd->link) { bd->free = bd->start; ASSERT(bd->gen_no == 0); - ASSERT(bd->step == stp); + ASSERT(bd->gen == g0); IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); } } @@ -484,25 +407,25 @@ countNurseryBlocks (void) nat i; lnat blocks = 0; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { blocks += nurseries[i].n_blocks; } return blocks; } static void -resizeNursery ( step *stp, nat blocks ) +resizeNursery ( nursery *nursery, nat blocks ) { bdescr *bd; nat nursery_blocks; - nursery_blocks = stp->n_blocks; + nursery_blocks = nursery->n_blocks; if (nursery_blocks == blocks) return; if (nursery_blocks < blocks) { debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", blocks); - stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks); + nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); } else { bdescr *next_bd; @@ -510,7 +433,7 @@ resizeNursery ( step *stp, nat blocks ) debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", blocks); - bd = stp->blocks; + bd = nursery->blocks; while (nursery_blocks > blocks) { next_bd = bd->link; next_bd->u.back = NULL; @@ -518,16 +441,16 @@ resizeNursery ( step *stp, nat blocks ) freeGroup(bd); bd = next_bd; } - stp->blocks = bd; + nursery->blocks = bd; // might have gone just under, by freeing a large block, so make // up the difference. if (nursery_blocks < blocks) { - stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks); + nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); } } - stp->n_blocks = blocks; - ASSERT(countBlocks(stp->blocks) == stp->n_blocks); + nursery->n_blocks = blocks; + ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks); } // @@ -537,7 +460,7 @@ void resizeNurseriesFixed (nat blocks) { nat i; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { resizeNursery(&nurseries[i], blocks); } } @@ -550,91 +473,66 @@ resizeNurseries (nat blocks) { // If there are multiple nurseries, then we just divide the number // of available blocks between them. - resizeNurseriesFixed(blocks / n_nurseries); + resizeNurseriesFixed(blocks / n_capabilities); } + /* ----------------------------------------------------------------------------- - The allocate() interface + move_TSO is called to update the TSO structure after it has been + moved from one place to another. + -------------------------------------------------------------------------- */ - allocate(n) always succeeds, and returns a chunk of memory n words - long. n can be larger than the size of a block if necessary, in - which case a contiguous block group will be allocated. +void +move_TSO (StgTSO *src, StgTSO *dest) +{ + ptrdiff_t diff; + + // relocate the stack pointer... + diff = (StgPtr)dest - (StgPtr)src; // In *words* + dest->sp = (StgPtr)dest->sp + diff; +} + +/* ----------------------------------------------------------------------------- + split N blocks off the front of the given bdescr, returning the + new block group. We add the remainder to the large_blocks list + in the same step as the original block. -------------------------------------------------------------------------- */ -StgPtr -allocate( nat n ) +bdescr * +splitLargeBlock (bdescr *bd, nat blocks) { - bdescr *bd; - StgPtr p; + bdescr *new_bd; ACQUIRE_SM_LOCK; - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); + ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks); - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += req_blocks; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = BF_LARGE; - bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; - - /* small allocation ( alloc_HpLim) { - if (small_alloc_list) { - small_alloc_list->free = alloc_Hp; - } - bd = allocBlock(); - bd->link = small_alloc_list; - small_alloc_list = bd; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = 0; - alloc_Hp = bd->start; - alloc_HpLim = bd->start + BLOCK_SIZE_W; - alloc_blocks++; - } - - p = alloc_Hp; - alloc_Hp += n; - RELEASE_SM_LOCK; - return p; -} + // subtract the original number of blocks from the counter first + bd->gen->n_large_blocks -= bd->blocks; -lnat -allocatedBytes( void ) -{ - lnat allocated; + new_bd = splitBlockGroup (bd, blocks); + initBdescr(new_bd, bd->gen, bd->gen->to); + new_bd->flags = BF_LARGE | (bd->flags & BF_EVACUATED); + // if new_bd is in an old generation, we have to set BF_EVACUATED + new_bd->free = bd->free; + dbl_link_onto(new_bd, &bd->gen->large_objects); - allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp); - if (pinned_object_block != NULL) { - allocated -= (pinned_object_block->start + BLOCK_SIZE_W) - - pinned_object_block->free; - } - - return allocated; -} + ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W); -void -tidyAllocateLists (void) -{ - if (small_alloc_list != NULL) { - ASSERT(alloc_Hp >= small_alloc_list->start && - alloc_Hp <= small_alloc_list->start + BLOCK_SIZE); - small_alloc_list->free = alloc_Hp; - } + // add the new number of blocks to the counter. Due to the gaps + // for block descriptors, new_bd->blocks + bd->blocks might not be + // equal to the original bd->blocks, which is why we do it this way. + bd->gen->n_large_blocks += bd->blocks + new_bd->blocks; + + ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks); + + RELEASE_SM_LOCK; + + return new_bd; } /* ----------------------------------------------------------------------------- - allocateLocal() + allocate() This allocates memory in the current thread - it is intended for use primarily from STG-land where we have a Capability. It is @@ -647,67 +545,82 @@ tidyAllocateLists (void) -------------------------------------------------------------------------- */ StgPtr -allocateLocal (Capability *cap, nat n) +allocate (Capability *cap, lnat n) { bdescr *bd; StgPtr p; - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - ACQUIRE_SM_LOCK; + lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + + // Attempting to allocate an object larger than maxHeapSize + // should definitely be disallowed. (bug #1791) + if (RtsFlags.GcFlags.maxHeapSize > 0 && + req_blocks >= RtsFlags.GcFlags.maxHeapSize) { + heapOverflow(); + // heapOverflow() doesn't exit (see #2592), but we aren't + // in a position to do a clean shutdown here: we + // either have to allocate the memory or exit now. + // Allocating the memory would be bad, because the user + // has requested that we not exceed maxHeapSize, so we + // just exit. + stg_exit(EXIT_HEAPOVERFLOW); + } + + ACQUIRE_SM_LOCK bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += req_blocks; - bd->gen_no = 0; - bd->step = g0s0; + dbl_link_onto(bd, &g0->large_objects); + g0->n_large_blocks += bd->blocks; // might be larger than req_blocks + g0->n_new_large_blocks += bd->blocks; + RELEASE_SM_LOCK; + initBdescr(bd, g0, g0); bd->flags = BF_LARGE; bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; return bd->start; - - /* small allocation (r.rCurrentAlloc; - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { - - // The CurrentAlloc block is full, we need to find another - // one. First, we try taking the next block from the - // nursery: - bd = cap->r.rCurrentNursery->link; - - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { - // The nursery is empty, or the next block is already - // full: allocate a fresh block (we can't fail here). - ACQUIRE_SM_LOCK; - bd = allocBlock(); - cap->r.rNursery->n_blocks++; - RELEASE_SM_LOCK; - bd->gen_no = 0; - bd->step = cap->r.rNursery; - bd->flags = 0; - } else { - // we have a block in the nursery: take it and put - // it at the *front* of the nursery list, and use it - // to allocate() from. - cap->r.rCurrentNursery->link = bd->link; - if (bd->link != NULL) { - bd->link->u.back = cap->r.rCurrentNursery; - } - } - dbl_link_onto(bd, &cap->r.rNursery->blocks); - cap->r.rCurrentAlloc = bd; - IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); - } + } + + /* small allocation (r.rCurrentAlloc; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + + // The CurrentAlloc block is full, we need to find another + // one. First, we try taking the next block from the + // nursery: + bd = cap->r.rCurrentNursery->link; + + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + // The nursery is empty, or the next block is already + // full: allocate a fresh block (we can't fail here). + ACQUIRE_SM_LOCK; + bd = allocBlock(); + cap->r.rNursery->n_blocks++; + RELEASE_SM_LOCK; + initBdescr(bd, g0, g0); + bd->flags = 0; + // If we had to allocate a new block, then we'll GC + // pretty quickly now, because MAYBE_GC() will + // notice that CurrentNursery->link is NULL. + } else { + // we have a block in the nursery: take it and put + // it at the *front* of the nursery list, and use it + // to allocate() from. + cap->r.rCurrentNursery->link = bd->link; + if (bd->link != NULL) { + bd->link->u.back = cap->r.rCurrentNursery; + } + } + dbl_link_onto(bd, &cap->r.rNursery->blocks); + cap->r.rCurrentAlloc = bd; + IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); } p = bd->free; bd->free += n; + + IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa)); return p; } @@ -716,7 +629,7 @@ allocateLocal (Capability *cap, nat n) We allocate small pinned objects into a single block, allocating a new block when the current one overflows. The block is chained - onto the large_object_list of generation 0 step 0. + onto the large_object_list of generation 0. NOTE: The GC can't in general handle pinned objects. This interface is only safe to use for ByteArrays, which have no @@ -735,120 +648,100 @@ allocateLocal (Capability *cap, nat n) ------------------------------------------------------------------------- */ StgPtr -allocatePinned( nat n ) +allocatePinned (Capability *cap, lnat n) { StgPtr p; - bdescr *bd = pinned_object_block; + bdescr *bd; // If the request is for a large object, then allocate() // will give us a pinned object anyway. if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - return allocate(n); + p = allocate(cap, n); + Bdescr(p)->flags |= BF_PINNED; + return p; } - ACQUIRE_SM_LOCK; - TICK_ALLOC_HEAP_NOCTR(n); CCS_ALLOC(CCCS,n); - // we always return 8-byte aligned memory. bd->free must be - // 8-byte aligned to begin with, so we just round up n to - // the nearest multiple of 8 bytes. - if (sizeof(StgWord) == 4) { - n = (n+1) & ~1; - } - + bd = cap->pinned_object_block; + // If we don't have a block of pinned objects yet, or the current // one isn't large enough to hold the new object, allocate a new one. if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { - pinned_object_block = bd = allocBlock(); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks++; - bd->gen_no = 0; - bd->step = g0s0; + ACQUIRE_SM_LOCK; + cap->pinned_object_block = bd = allocBlock(); + dbl_link_onto(bd, &g0->large_objects); + g0->n_large_blocks++; + g0->n_new_large_blocks++; + RELEASE_SM_LOCK; + initBdescr(bd, g0, g0); bd->flags = BF_PINNED | BF_LARGE; bd->free = bd->start; - alloc_blocks++; } p = bd->free; bd->free += n; - RELEASE_SM_LOCK; return p; } /* ----------------------------------------------------------------------------- + Write Barriers + -------------------------------------------------------------------------- */ + +/* This is the write barrier for MUT_VARs, a.k.a. IORefs. A MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY and is put on the mutable list. - -------------------------------------------------------------------------- */ - +*/ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) { Capability *cap = regTableToCapability(reg); - bdescr *bd; if (p->header.info == &stg_MUT_VAR_CLEAN_info) { p->header.info = &stg_MUT_VAR_DIRTY_info; - bd = Bdescr((StgPtr)p); - if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no); + recordClosureMutated(cap,p); } } -/* ----------------------------------------------------------------------------- - Allocation functions for GMP. - - These all use the allocate() interface - we can't have any garbage - collection going on during a gmp operation, so we use allocate() - which always succeeds. The gmp operations which might need to - allocate will ask the storage manager (via doYouWantToGC()) whether - a garbage collection is required, in case we get into a loop doing - only allocate() style allocation. - -------------------------------------------------------------------------- */ - -static void * -stgAllocForGMP (size_t size_in_bytes) +// Setting a TSO's link field with a write barrier. +// It is *not* necessary to call this function when +// * setting the link field to END_TSO_QUEUE +// * putting a TSO on the blackhole_queue +// * setting the link field of the currently running TSO, as it +// will already be dirty. +void +setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target) { - StgArrWords* arr; - nat data_size_in_words, total_size_in_words; - - /* round up to a whole number of words */ - data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_); - total_size_in_words = sizeofW(StgArrWords) + data_size_in_words; - - /* allocate and fill it in. */ -#if defined(THREADED_RTS) - arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words); -#else - arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words); -#endif - SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words); - - /* and return a ptr to the goods inside the array */ - return arr->payload; + if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { + tso->flags |= TSO_LINK_DIRTY; + recordClosureMutated(cap,(StgClosure*)tso); + } + tso->_link = target; } -static void * -stgReallocForGMP (void *ptr, size_t old_size, size_t new_size) +void +dirty_TSO (Capability *cap, StgTSO *tso) { - void *new_stuff_ptr = stgAllocForGMP(new_size); - nat i = 0; - char *p = (char *) ptr; - char *q = (char *) new_stuff_ptr; - - for (; i < old_size; i++, p++, q++) { - *q = *p; + if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { + recordClosureMutated(cap,(StgClosure*)tso); } - - return(new_stuff_ptr); + tso->dirty = 1; } -static void -stgDeallocForGMP (void *ptr STG_UNUSED, - size_t size STG_UNUSED) +/* + This is the write barrier for MVARs. An MVAR_CLEAN objects is not + on the mutable list; a MVAR_DIRTY is. When written to, a + MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list. + The check for MVAR_CLEAN is inlined at the call site for speed, + this really does make a difference on concurrency-heavy benchmarks + such as Chaneneos and cheap-concurrency. +*/ +void +dirty_MVAR(StgRegTable *reg, StgClosure *p) { - /* easy for us: the garbage collector does the dealloc'n */ + recordClosureMutated(regTableToCapability(reg),p); } /* ----------------------------------------------------------------------------- @@ -860,8 +753,7 @@ stgDeallocForGMP (void *ptr STG_UNUSED, * * Approximate how much we've allocated: number of blocks in the * nursery + blocks allocated via allocate() - unused nusery blocks. - * This leaves a little slop at the end of each block, and doesn't - * take into account large objects (ToDo). + * This leaves a little slop at the end of each block. * -------------------------------------------------------------------------- */ lnat @@ -869,14 +761,11 @@ calcAllocated( void ) { nat allocated; bdescr *bd; + nat i; - allocated = allocatedBytes(); - allocated += countNurseryBlocks() * BLOCK_SIZE_W; + allocated = countNurseryBlocks() * BLOCK_SIZE_W; - { -#ifdef THREADED_RTS - nat i; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { Capability *cap; for ( bd = capabilities[i].r.rCurrentNursery->link; bd != NULL; bd = bd->link ) { @@ -888,80 +777,100 @@ calcAllocated( void ) allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W) - cap->r.rCurrentNursery->free; } + if (cap->pinned_object_block != NULL) { + allocated -= (cap->pinned_object_block->start + BLOCK_SIZE_W) - + cap->pinned_object_block->free; + } } -#else - bdescr *current_nursery = MainCapability.r.rCurrentNursery; - for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) { - allocated -= BLOCK_SIZE_W; - } - if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) { - allocated -= (current_nursery->start + BLOCK_SIZE_W) - - current_nursery->free; - } -#endif - } + allocated += g0->n_new_large_blocks * BLOCK_SIZE_W; - total_allocated += allocated; return allocated; } /* Approximate the amount of live data in the heap. To be called just * after garbage collection (see GarbageCollect()). */ -extern lnat -calcLive(void) +lnat calcLiveBlocks (void) { - nat g, s; + nat g; lnat live = 0; - step *stp; - - if (RtsFlags.GcFlags.generations == 1) { - return (g0s0->n_large_blocks + g0s0->n_blocks) * BLOCK_SIZE_W; - } + generation *gen; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { /* approximate amount of live data (doesn't take into account slop * at end of each block). */ - if (g == 0 && s == 0) { - continue; - } - stp = &generations[g].steps[s]; - live += (stp->n_large_blocks + stp->n_blocks) * BLOCK_SIZE_W; - } + gen = &generations[g]; + live += gen->n_large_blocks + gen->n_blocks; } return live; } +lnat countOccupied (bdescr *bd) +{ + lnat words; + + words = 0; + for (; bd != NULL; bd = bd->link) { + ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W); + words += bd->free - bd->start; + } + return words; +} + +// Return an accurate count of the live data in the heap, excluding +// generation 0. +lnat calcLiveWords (void) +{ + nat g; + lnat live; + generation *gen; + + live = 0; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + gen = &generations[g]; + live += gen->n_words + countOccupied(gen->large_objects); + } + return live; +} + /* Approximate the number of blocks that will be needed at the next * garbage collection. * - * Assume: all data currently live will remain live. Steps that will - * be collected next time will therefore need twice as many blocks - * since all the data will be copied. + * Assume: all data currently live will remain live. Generationss + * that will be collected next time will therefore need twice as many + * blocks since all the data will be copied. */ extern lnat calcNeeded(void) { lnat needed = 0; - nat g, s; - step *stp; + nat g; + generation *gen; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - if (g == 0 && s == 0) { continue; } - stp = &generations[g].steps[s]; - if (generations[g].steps[0].n_blocks + - generations[g].steps[0].n_large_blocks - > generations[g].max_blocks - && stp->is_compacted == 0) { - needed += 2 * stp->n_blocks; - } else { - needed += stp->n_blocks; - } - } + gen = &generations[g]; + + // we need at least this much space + needed += gen->n_blocks + gen->n_large_blocks; + + // any additional space needed to collect this gen next time? + if (g == 0 || // always collect gen 0 + (gen->n_blocks + gen->n_large_blocks > gen->max_blocks)) { + // we will collect this gen next time + if (gen->mark) { + // bitmap: + needed += gen->n_blocks / BITS_IN(W_); + // mark stack: + needed += gen->n_blocks / 100; + } + if (gen->compact) { + continue; // no additional space needed for compaction + } else { + needed += gen->n_blocks; + } + } } return needed; } @@ -979,11 +888,44 @@ calcNeeded(void) in the page, and when the page is emptied (all objects on the page are free) we free the page again, not forgetting to make it non-executable. + + TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that + the linker cannot use allocateExec for loading object code files + on Windows. Once allocateExec can handle larger objects, the linker + should be modified to use allocateExec instead of VirtualAlloc. ------------------------------------------------------------------------- */ -static bdescr *exec_block; +#if defined(linux_HOST_OS) + +// On Linux we need to use libffi for allocating executable memory, +// because it knows how to work around the restrictions put in place +// by SELinux. + +void *allocateExec (nat bytes, void **exec_ret) +{ + void **ret, **exec; + ACQUIRE_SM_LOCK; + ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec); + RELEASE_SM_LOCK; + if (ret == NULL) return ret; + *ret = ret; // save the address of the writable mapping, for freeExec(). + *exec_ret = exec + 1; + return (ret + 1); +} -void *allocateExec (nat bytes) +// freeExec gets passed the executable address, not the writable address. +void freeExec (void *addr) +{ + void *writable; + writable = *((void**)addr - 1); + ACQUIRE_SM_LOCK; + ffi_closure_free (writable); + RELEASE_SM_LOCK +} + +#else + +void *allocateExec (nat bytes, void **exec_ret) { void *ret; nat n; @@ -1019,6 +961,7 @@ void *allocateExec (nat bytes) exec_block->free += n + 1; RELEASE_SM_LOCK + *exec_ret = ret; return ret; } @@ -1040,210 +983,26 @@ void freeExec (void *addr) bd->gen_no -= *(StgPtr)p; *(StgPtr)p = 0; - // Free the block if it is empty, but not if it is the block at - // the head of the queue. - if (bd->gen_no == 0 && bd != exec_block) { - debugTrace(DEBUG_gc, "free exec block %p", bd->start); - if (bd->u.back) { - bd->u.back->link = bd->link; - } else { - exec_block = bd->link; - } - if (bd->link) { - bd->link->u.back = bd->u.back; - } - setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse); - freeGroup(bd); + if (bd->gen_no == 0) { + // Free the block if it is empty, but not if it is the block at + // the head of the queue. + if (bd != exec_block) { + debugTrace(DEBUG_gc, "free exec block %p", bd->start); + dbl_link_remove(bd, &exec_block); + setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse); + freeGroup(bd); + } else { + bd->free = bd->start; + } } RELEASE_SM_LOCK } -/* ----------------------------------------------------------------------------- - Debugging - - memInventory() checks for memory leaks by counting up all the - blocks we know about and comparing that to the number of blocks - allegedly floating around in the system. - -------------------------------------------------------------------------- */ +#endif /* mingw32_HOST_OS */ #ifdef DEBUG -static lnat -stepBlocks (step *stp) -{ - lnat total_blocks; - bdescr *bd; - - total_blocks = stp->n_blocks; - total_blocks += stp->n_old_blocks; - for (bd = stp->large_objects; bd; bd = bd->link) { - total_blocks += bd->blocks; - /* hack for megablock groups: they have an extra block or two in - the second and subsequent megablocks where the block - descriptors would normally go. - */ - if (bd->blocks > BLOCKS_PER_MBLOCK) { - total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK) - * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE)); - } - } - return total_blocks; -} - -void -memInventory(void) -{ - nat g, s, i; - step *stp; - bdescr *bd; - lnat gen_blocks[RtsFlags.GcFlags.generations]; - lnat nursery_blocks, allocate_blocks, retainer_blocks, - arena_blocks, exec_blocks; - lnat live_blocks = 0, free_blocks = 0; - - // count the blocks we current have - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen_blocks[g] = 0; - for (i = 0; i < n_capabilities; i++) { - for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) { - gen_blocks[g] += bd->blocks; - } - } - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - gen_blocks[g] += bd->blocks; - } - for (s = 0; s < generations[g].n_steps; s++) { - if (g==0 && s==0) continue; - stp = &generations[g].steps[s]; - gen_blocks[g] += stepBlocks(stp); - } - } - - nursery_blocks = 0; - for (i = 0; i < n_nurseries; i++) { - nursery_blocks += stepBlocks(&nurseries[i]); - } -#ifdef THREADED_RTS - // We put pinned object blocks in g0s0, so better count blocks there too. - gen_blocks[0] += stepBlocks(g0s0); -#endif - - /* any blocks held by allocate() */ - allocate_blocks = 0; - for (bd = small_alloc_list; bd; bd = bd->link) { - allocate_blocks += bd->blocks; - } - - retainer_blocks = 0; -#ifdef PROFILING - if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { - retainer_blocks = retainerStackBlocks(); - } -#endif - - // count the blocks allocated by the arena allocator - arena_blocks = arenaBlocks(); - - // count the blocks containing executable memory - exec_blocks = 0; - for (bd = exec_block; bd; bd = bd->link) { - exec_blocks += bd->blocks; - } - - /* count the blocks on the free list */ - free_blocks = countFreeList(); - - live_blocks = 0; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - live_blocks += gen_blocks[g]; - } - live_blocks += nursery_blocks + allocate_blocks - + retainer_blocks + arena_blocks + exec_blocks; - - if (live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK) - { - debugBelch("Memory leak detected\n"); - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - debugBelch(" gen %d blocks : %4lu\n", g, gen_blocks[g]); - } - debugBelch(" nursery : %4lu\n", nursery_blocks); - debugBelch(" allocate() : %4lu\n", allocate_blocks); - debugBelch(" retainer : %4lu\n", retainer_blocks); - debugBelch(" arena blocks : %4lu\n", arena_blocks); - debugBelch(" exec : %4lu\n", exec_blocks); - debugBelch(" free : %4lu\n", free_blocks); - debugBelch(" total : %4lu\n\n", live_blocks + free_blocks); - debugBelch(" in system : %4lu\n", mblocks_allocated + BLOCKS_PER_MBLOCK); - ASSERT(0); - } -} - - -nat -countBlocks(bdescr *bd) -{ - nat n; - for (n=0; bd != NULL; bd=bd->link) { - n += bd->blocks; - } - return n; -} - -/* Full heap sanity check. */ -void -checkSanity( void ) -{ - nat g, s; - - if (RtsFlags.GcFlags.generations == 1) { - checkHeap(g0s0->blocks); - checkChain(g0s0->large_objects); - } else { - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - if (g == 0 && s == 0) { continue; } - ASSERT(countBlocks(generations[g].steps[s].blocks) - == generations[g].steps[s].n_blocks); - ASSERT(countBlocks(generations[g].steps[s].large_objects) - == generations[g].steps[s].n_large_blocks); - checkHeap(generations[g].steps[s].blocks); - checkChain(generations[g].steps[s].large_objects); - if (g > 0) { - checkMutableList(generations[g].mut_list, g); - } - } - } - - for (s = 0; s < n_nurseries; s++) { - ASSERT(countBlocks(nurseries[s].blocks) - == nurseries[s].n_blocks); - ASSERT(countBlocks(nurseries[s].large_objects) - == nurseries[s].n_large_blocks); - } - - checkFreeListSanity(); - } -} - -/* Nursery sanity check */ -void -checkNurserySanity( step *stp ) -{ - bdescr *bd, *prev; - nat blocks = 0; - - prev = NULL; - for (bd = stp->blocks; bd != NULL; bd = bd->link) { - ASSERT(bd->u.back == prev); - prev = bd; - blocks += bd->blocks; - } - ASSERT(blocks == stp->n_blocks); -} - // handy function for use in gdb, because Bdescr() is inlined. extern bdescr *_bdescr( StgPtr p );