X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FStorage.c;h=4933854049a353ff5abd43b8fc7b0230f380d730;hb=2322bc9a89a9d8a6132a6818ccff6f665d7ed7f1;hp=770b43a82219e57f247deaa7571b202b9f9071d6;hpb=95ca6bff6fc9918203173b442192d9298ef9757a;p=ghc-hetmet.git diff --git a/ghc/rts/Storage.c b/ghc/rts/Storage.c index 770b43a..4933854 100644 --- a/ghc/rts/Storage.c +++ b/ghc/rts/Storage.c @@ -17,17 +17,21 @@ #include "Weak.h" #include "Sanity.h" #include "Arena.h" - +#include "OSThreads.h" +#include "Capability.h" #include "Storage.h" #include "Schedule.h" -#include "OSThreads.h" - #include "RetainerProfile.h" // for counting memory blocks (memInventory) #include #include +/* + * All these globals require sm_mutex to access in THREADED_RTS mode. + */ StgClosure *caf_list = NULL; +StgClosure *revertible_caf_list = NULL; +rtsBool keepCAFs; bdescr *small_alloc_list; /* allocate()d small objects */ bdescr *pinned_object_block; /* allocate pinned objects into this block */ @@ -44,14 +48,22 @@ step *g0s0 = NULL; /* generation 0, step 0, for convenience */ ullong total_allocated = 0; /* total memory allocated during run */ +nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */ +step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */ + +#ifdef THREADED_RTS /* * Storage manager mutex: protects all the above state from * simultaneous access by two STG threads. */ -#ifdef SMP -Mutex sm_mutex = INIT_MUTEX_VAR; +Mutex sm_mutex; +/* + * This mutex is used by atomicModifyMutVar# only + */ +Mutex atomic_modify_mutvar_mutex; #endif + /* * Forward references */ @@ -59,11 +71,36 @@ static void *stgAllocForGMP (size_t size_in_bytes); static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size); static void stgDeallocForGMP (void *ptr, size_t size); +static void +initStep (step *stp, int g, int s) +{ + stp->no = s; + stp->blocks = NULL; + stp->n_blocks = 0; + stp->old_blocks = NULL; + stp->n_old_blocks = 0; + stp->gen = &generations[g]; + stp->gen_no = g; + stp->hp = NULL; + stp->hpLim = NULL; + stp->hp_bd = NULL; + stp->scavd_hp = NULL; + stp->scavd_hpLim = NULL; + stp->scan = NULL; + stp->scan_bd = NULL; + stp->large_objects = NULL; + stp->n_large_blocks = 0; + stp->new_large_objects = NULL; + stp->scavenged_large_objects = NULL; + stp->n_scavenged_large_blocks = 0; + stp->is_compacted = 0; + stp->bitmap = NULL; +} + void initStorage( void ) { nat g, s; - step *stp; generation *gen; if (generations != NULL) { @@ -93,21 +130,23 @@ initStorage( void ) initBlockAllocator(); -#if defined(SMP) +#if defined(THREADED_RTS) initMutex(&sm_mutex); + initMutex(&atomic_modify_mutvar_mutex); #endif + ACQUIRE_SM_LOCK; + /* allocate generation info array */ generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations - * sizeof(struct _generation), + * sizeof(struct generation_), "initStorage: gens"); /* Initialise all generations */ for(g = 0; g < RtsFlags.GcFlags.generations; g++) { gen = &generations[g]; gen->no = g; - gen->mut_list = END_MUT_LIST; - gen->mut_once_list = END_MUT_LIST; + gen->mut_list = allocBlock(); gen->collections = 0; gen->failed_promotions = 0; gen->max_blocks = 0; @@ -124,47 +163,44 @@ initStorage( void ) /* Oldest generation: one step */ oldest_gen->n_steps = 1; oldest_gen->steps = - stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step"); + stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step"); /* set up all except the oldest generation with 2 steps */ for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) { generations[g].n_steps = RtsFlags.GcFlags.steps; generations[g].steps = - stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step), + stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_), "initStorage: steps"); } } else { /* single generation, i.e. a two-space collector */ g0->n_steps = 1; - g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps"); + g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps"); } +#ifdef THREADED_RTS + n_nurseries = n_capabilities; + nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), + "initStorage: nurseries"); +#else + n_nurseries = 1; + nurseries = g0->steps; // just share nurseries[0] with g0s0 +#endif + /* Initialise all steps */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) { for (s = 0; s < generations[g].n_steps; s++) { - stp = &generations[g].steps[s]; - stp->no = s; - stp->blocks = NULL; - stp->n_to_blocks = 0; - stp->n_blocks = 0; - stp->gen = &generations[g]; - stp->gen_no = g; - stp->hp = NULL; - stp->hpLim = NULL; - stp->hp_bd = NULL; - stp->scan = NULL; - stp->scan_bd = NULL; - stp->large_objects = NULL; - stp->n_large_blocks = 0; - stp->new_large_objects = NULL; - stp->scavenged_large_objects = NULL; - stp->n_scavenged_large_blocks = 0; - stp->is_compacted = 0; - stp->bitmap = NULL; + initStep(&generations[g].steps[s], g, s); } } +#ifdef THREADED_RTS + for (s = 0; s < n_nurseries; s++) { + initStep(&nurseries[s], 0, s); + } +#endif + /* Set up the destination pointers in each younger gen. step */ for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) { for (s = 0; s < generations[g].n_steps-1; s++) { @@ -172,8 +208,15 @@ initStorage( void ) } generations[g].steps[s].to = &generations[g+1].steps[0]; } + oldest_gen->steps[0].to = &oldest_gen->steps[0]; + +#ifdef THREADED_RTS + for (s = 0; s < n_nurseries; s++) { + nurseries[s].to = generations[0].steps[0].to; + } +#endif - /* The oldest generation has one step and it is compacted. */ + /* The oldest generation has one step. */ if (RtsFlags.GcFlags.compact) { if (RtsFlags.GcFlags.generations == 1) { errorBelch("WARNING: compaction is incompatible with -G1; disabled"); @@ -181,7 +224,13 @@ initStorage( void ) oldest_gen->steps[0].is_compacted = 1; } } - oldest_gen->steps[0].to = &oldest_gen->steps[0]; + +#ifdef THREADED_RTS + if (RtsFlags.GcFlags.generations == 1) { + errorBelch("-G1 is incompatible with -threaded"); + stg_exit(EXIT_FAILURE); + } +#endif /* generation 0 is special: that's the nursery */ generations[0].max_blocks = 0; @@ -198,6 +247,7 @@ initStorage( void ) weak_ptr_list = NULL; caf_list = NULL; + revertible_caf_list = NULL; /* initialise the allocate() interface */ small_alloc_list = NULL; @@ -208,6 +258,8 @@ initStorage( void ) mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP); IF_DEBUG(gc, statDescribeGens()); + + RELEASE_SM_LOCK; } void @@ -260,19 +312,37 @@ exitStorage (void) void newCAF(StgClosure* caf) { - /* Put this CAF on the mutable list for the old generation. - * This is a HACK - the IND_STATIC closure doesn't really have - * a mut_link field, but we pretend it has - in fact we re-use - * the STATIC_LINK field for the time being, because when we - * come to do a major GC we won't need the mut_link field - * any more and can use it as a STATIC_LINK. - */ ACQUIRE_SM_LOCK; - ((StgIndStatic *)caf)->saved_info = NULL; - ((StgMutClosure *)caf)->mut_link = oldest_gen->mut_once_list; - oldest_gen->mut_once_list = (StgMutClosure *)caf; - + if(keepCAFs) + { + // HACK: + // If we are in GHCi _and_ we are using dynamic libraries, + // then we can't redirect newCAF calls to newDynCAF (see below), + // so we make newCAF behave almost like newDynCAF. + // The dynamic libraries might be used by both the interpreted + // program and GHCi itself, so they must not be reverted. + // This also means that in GHCi with dynamic libraries, CAFs are not + // garbage collected. If this turns out to be a problem, we could + // do another hack here and do an address range test on caf to figure + // out whether it is from a dynamic library. + ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info; + ((StgIndStatic *)caf)->static_link = caf_list; + caf_list = caf; + } + else + { + /* Put this CAF on the mutable list for the old generation. + * This is a HACK - the IND_STATIC closure doesn't really have + * a mut_link field, but we pretend it has - in fact we re-use + * the STATIC_LINK field for the time being, because when we + * come to do a major GC we won't need the mut_link field + * any more and can use it as a STATIC_LINK. + */ + ((StgIndStatic *)caf)->saved_info = NULL; + recordMutableGen(caf, oldest_gen); + } + RELEASE_SM_LOCK; #ifdef PAR @@ -289,6 +359,8 @@ newCAF(StgClosure* caf) // object code in GHCi. In this case we want to retain *all* CAFs in // the object code, because they might be demanded at any time from an // expression evaluated on the command line. +// Also, GHCi might want to revert CAFs, so we add these to the +// revertible_caf_list. // // The linker hackily arranges that references to newCaf from dynamic // code end up pointing to newDynCAF. @@ -298,8 +370,8 @@ newDynCAF(StgClosure *caf) ACQUIRE_SM_LOCK; ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info; - ((StgIndStatic *)caf)->static_link = caf_list; - caf_list = caf; + ((StgIndStatic *)caf)->static_link = revertible_caf_list; + revertible_caf_list = caf; RELEASE_SM_LOCK; } @@ -308,127 +380,125 @@ newDynCAF(StgClosure *caf) Nursery management. -------------------------------------------------------------------------- */ +static bdescr * +allocNursery (step *stp, bdescr *tail, nat blocks) +{ + bdescr *bd; + nat i; + + // Allocate a nursery: we allocate fresh blocks one at a time and + // cons them on to the front of the list, not forgetting to update + // the back pointer on the tail of the list to point to the new block. + for (i=0; i < blocks; i++) { + // @LDV profiling + /* + processNursery() in LdvProfile.c assumes that every block group in + the nursery contains only a single block. So, if a block group is + given multiple blocks, change processNursery() accordingly. + */ + bd = allocBlock(); + bd->link = tail; + // double-link the nursery: we might need to insert blocks + if (tail != NULL) { + tail->u.back = bd; + } + bd->step = stp; + bd->gen_no = 0; + bd->flags = 0; + bd->free = bd->start; + tail = bd; + } + tail->u.back = NULL; + return tail; +} + +static void +assignNurseriesToCapabilities (void) +{ +#ifdef THREADED_RTS + nat i; + + for (i = 0; i < n_nurseries; i++) { + capabilities[i].r.rNursery = &nurseries[i]; + capabilities[i].r.rCurrentNursery = nurseries[i].blocks; + capabilities[i].r.rCurrentAlloc = NULL; + } +#else /* THREADED_RTS */ + MainCapability.r.rNursery = &nurseries[0]; + MainCapability.r.rCurrentNursery = nurseries[0].blocks; + MainCapability.r.rCurrentAlloc = NULL; +#endif +} + void allocNurseries( void ) { -#ifdef SMP - Capability *cap; - bdescr *bd; - - g0s0->blocks = NULL; - g0s0->n_blocks = 0; - for (cap = free_capabilities; cap != NULL; cap = cap->link) { - cap->r.rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize); - cap->r.rCurrentNursery = cap->r.rNursery; - /* Set the back links to be equal to the Capability, - * so we can do slightly better informed locking. - */ - for (bd = cap->r.rNursery; bd != NULL; bd = bd->link) { - bd->u.back = (bdescr *)cap; + nat i; + + for (i = 0; i < n_nurseries; i++) { + nurseries[i].blocks = + allocNursery(&nurseries[i], NULL, + RtsFlags.GcFlags.minAllocAreaSize); + nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize; + nurseries[i].old_blocks = NULL; + nurseries[i].n_old_blocks = 0; + /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */ } - } -#else /* SMP */ - g0s0->blocks = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize); - g0s0->n_blocks = RtsFlags.GcFlags.minAllocAreaSize; - g0s0->to_blocks = NULL; - g0s0->n_to_blocks = 0; - MainCapability.r.rNursery = g0s0->blocks; - MainCapability.r.rCurrentNursery = g0s0->blocks; - /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */ -#endif + assignNurseriesToCapabilities(); } void resetNurseries( void ) { - bdescr *bd; -#ifdef SMP - Capability *cap; - - /* All tasks must be stopped */ - ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); - - for (cap = free_capabilities; cap != NULL; cap = cap->link) { - for (bd = cap->r.rNursery; bd; bd = bd->link) { - bd->free = bd->start; - ASSERT(bd->gen_no == 0); - ASSERT(bd->step == g0s0); - IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); + nat i; + bdescr *bd; + step *stp; + + for (i = 0; i < n_nurseries; i++) { + stp = &nurseries[i]; + for (bd = stp->blocks; bd; bd = bd->link) { + bd->free = bd->start; + ASSERT(bd->gen_no == 0); + ASSERT(bd->step == stp); + IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); + } } - cap->r.rCurrentNursery = cap->r.rNursery; - } -#else - for (bd = g0s0->blocks; bd; bd = bd->link) { - bd->free = bd->start; - ASSERT(bd->gen_no == 0); - ASSERT(bd->step == g0s0); - IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); - } - MainCapability.r.rNursery = g0s0->blocks; - MainCapability.r.rCurrentNursery = g0s0->blocks; -#endif + assignNurseriesToCapabilities(); } -bdescr * -allocNursery (bdescr *tail, nat blocks) +lnat +countNurseryBlocks (void) { - bdescr *bd; - nat i; + nat i; + lnat blocks = 0; - // Allocate a nursery: we allocate fresh blocks one at a time and - // cons them on to the front of the list, not forgetting to update - // the back pointer on the tail of the list to point to the new block. - for (i=0; i < blocks; i++) { - // @LDV profiling - /* - processNursery() in LdvProfile.c assumes that every block group in - the nursery contains only a single block. So, if a block group is - given multiple blocks, change processNursery() accordingly. - */ - bd = allocBlock(); - bd->link = tail; - // double-link the nursery: we might need to insert blocks - if (tail != NULL) { - tail->u.back = bd; + for (i = 0; i < n_nurseries; i++) { + blocks += nurseries[i].n_blocks; } - bd->step = g0s0; - bd->gen_no = 0; - bd->flags = 0; - bd->free = bd->start; - tail = bd; - } - tail->u.back = NULL; - return tail; + return blocks; } -void -resizeNursery ( nat blocks ) +static void +resizeNursery ( step *stp, nat blocks ) { bdescr *bd; nat nursery_blocks; -#ifdef SMP - barf("resizeNursery: can't resize in SMP mode"); -#endif + nursery_blocks = stp->n_blocks; + if (nursery_blocks == blocks) return; - nursery_blocks = g0s0->n_blocks; - if (nursery_blocks == blocks) { - return; - } - - else if (nursery_blocks < blocks) { + if (nursery_blocks < blocks) { IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n", blocks)); - g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks); + stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks); } - else { bdescr *next_bd; IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n", blocks)); - bd = g0s0->blocks; + bd = stp->blocks; while (nursery_blocks > blocks) { next_bd = bd->link; next_bd->u.back = NULL; @@ -436,16 +506,39 @@ resizeNursery ( nat blocks ) freeGroup(bd); bd = next_bd; } - g0s0->blocks = bd; + stp->blocks = bd; // might have gone just under, by freeing a large block, so make // up the difference. if (nursery_blocks < blocks) { - g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks); + stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks); } } - g0s0->n_blocks = blocks; - ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks); + stp->n_blocks = blocks; + ASSERT(countBlocks(stp->blocks) == stp->n_blocks); +} + +// +// Resize each of the nurseries to the specified size. +// +void +resizeNurseriesFixed (nat blocks) +{ + nat i; + for (i = 0; i < n_nurseries; i++) { + resizeNursery(&nurseries[i], blocks); + } +} + +// +// Resize the nurseries to the total specified size. +// +void +resizeNurseries (nat blocks) +{ + // If there are multiple nurseries, then we just divide the number + // of available blocks between them. + resizeNurseriesFixed(blocks / n_nurseries); } /* ----------------------------------------------------------------------------- @@ -459,49 +552,49 @@ resizeNursery ( nat blocks ) StgPtr allocate( nat n ) { - bdescr *bd; - StgPtr p; + bdescr *bd; + StgPtr p; - ACQUIRE_SM_LOCK; + ACQUIRE_SM_LOCK; - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += req_blocks; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = BF_LARGE; - bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); - /* small allocation ( alloc_HpLim) { - if (small_alloc_list) { - small_alloc_list->free = alloc_Hp; + /* big allocation (>LARGE_OBJECT_THRESHOLD) */ + /* ToDo: allocate directly into generation 1 */ + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + bd = allocGroup(req_blocks); + dbl_link_onto(bd, &g0s0->large_objects); + g0s0->n_large_blocks += req_blocks; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = BF_LARGE; + bd->free = bd->start + n; + alloc_blocks += req_blocks; + RELEASE_SM_LOCK; + return bd->start; + + /* small allocation ( alloc_HpLim) { + if (small_alloc_list) { + small_alloc_list->free = alloc_Hp; + } + bd = allocBlock(); + bd->link = small_alloc_list; + small_alloc_list = bd; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = 0; + alloc_Hp = bd->start; + alloc_HpLim = bd->start + BLOCK_SIZE_W; + alloc_blocks++; } - bd = allocBlock(); - bd->link = small_alloc_list; - small_alloc_list = bd; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = 0; - alloc_Hp = bd->start; - alloc_HpLim = bd->start + BLOCK_SIZE_W; - alloc_blocks++; - } - - p = alloc_Hp; - alloc_Hp += n; - RELEASE_SM_LOCK; - return p; + + p = alloc_Hp; + alloc_Hp += n; + RELEASE_SM_LOCK; + return p; } lnat @@ -528,6 +621,84 @@ tidyAllocateLists (void) } } +/* ----------------------------------------------------------------------------- + allocateLocal() + + This allocates memory in the current thread - it is intended for + use primarily from STG-land where we have a Capability. It is + better than allocate() because it doesn't require taking the + sm_mutex lock in the common case. + + Memory is allocated directly from the nursery if possible (but not + from the current nursery block, so as not to interfere with + Hp/HpLim). + -------------------------------------------------------------------------- */ + +StgPtr +allocateLocal (Capability *cap, nat n) +{ + bdescr *bd; + StgPtr p; + + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + + /* big allocation (>LARGE_OBJECT_THRESHOLD) */ + /* ToDo: allocate directly into generation 1 */ + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + ACQUIRE_SM_LOCK; + bd = allocGroup(req_blocks); + dbl_link_onto(bd, &g0s0->large_objects); + g0s0->n_large_blocks += req_blocks; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = BF_LARGE; + bd->free = bd->start + n; + alloc_blocks += req_blocks; + RELEASE_SM_LOCK; + return bd->start; + + /* small allocation (r.rCurrentAlloc; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + + // The CurrentAlloc block is full, we need to find another + // one. First, we try taking the next block from the + // nursery: + bd = cap->r.rCurrentNursery->link; + + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + // The nursery is empty, or the next block is already + // full: allocate a fresh block (we can't fail here). + ACQUIRE_SM_LOCK; + bd = allocBlock(); + cap->r.rNursery->n_blocks++; + RELEASE_SM_LOCK; + bd->gen_no = 0; + bd->step = cap->r.rNursery; + bd->flags = 0; + } else { + // we have a block in the nursery: take it and put + // it at the *front* of the nursery list, and use it + // to allocate() from. + cap->r.rCurrentNursery->link = bd->link; + if (bd->link != NULL) { + bd->link->u.back = cap->r.rCurrentNursery; + } + } + dbl_link_onto(bd, &cap->r.rNursery->blocks); + cap->r.rCurrentAlloc = bd; + IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); + } + } + p = bd->free; + bd->free += n; + return p; +} + /* --------------------------------------------------------------------------- Allocate a fixed/pinned object. @@ -594,6 +765,25 @@ allocatePinned( nat n ) } /* ----------------------------------------------------------------------------- + This is the write barrier for MUT_VARs, a.k.a. IORefs. A + MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY + is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY + and is put on the mutable list. + -------------------------------------------------------------------------- */ + +void +dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) +{ + Capability *cap = regTableToCapability(reg); + bdescr *bd; + if (p->header.info == &stg_MUT_VAR_CLEAN_info) { + p->header.info = &stg_MUT_VAR_DIRTY_info; + bd = Bdescr((StgPtr)p); + if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no); + } +} + +/* ----------------------------------------------------------------------------- Allocation functions for GMP. These all use the allocate() interface - we can't have any garbage @@ -615,7 +805,11 @@ stgAllocForGMP (size_t size_in_bytes) total_size_in_words = sizeofW(StgArrWords) + data_size_in_words; /* allocate and fill it in. */ - arr = (StgArrWords *)allocate(total_size_in_words); +#if defined(THREADED_RTS) + arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words); +#else + arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words); +#endif SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words); /* and return a ptr to the goods inside the array */ @@ -663,42 +857,37 @@ calcAllocated( void ) nat allocated; bdescr *bd; -#ifdef SMP - Capability *cap; - - /* All tasks must be stopped. Can't assert that all the - capabilities are owned by the scheduler, though: one or more - tasks might have been stopped while they were running (non-main) - threads. */ - /* ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */ - - allocated = - n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W - + allocated_bytes(); - - for (cap = free_capabilities; cap != NULL; cap = cap->link) { - for ( bd = cap->r.rCurrentNursery->link; bd != NULL; bd = bd->link ) { - allocated -= BLOCK_SIZE_W; - } - if (cap->r.rCurrentNursery->free < cap->r.rCurrentNursery->start - + BLOCK_SIZE_W) { - allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W) - - cap->r.rCurrentNursery->free; - } + allocated = allocated_bytes(); + allocated += countNurseryBlocks() * BLOCK_SIZE_W; + + { +#ifdef THREADED_RTS + nat i; + for (i = 0; i < n_nurseries; i++) { + Capability *cap; + for ( bd = capabilities[i].r.rCurrentNursery->link; + bd != NULL; bd = bd->link ) { + allocated -= BLOCK_SIZE_W; + } + cap = &capabilities[i]; + if (cap->r.rCurrentNursery->free < + cap->r.rCurrentNursery->start + BLOCK_SIZE_W) { + allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W) + - cap->r.rCurrentNursery->free; + } } - -#else /* !SMP */ +#else bdescr *current_nursery = MainCapability.r.rCurrentNursery; - allocated = (g0s0->n_blocks * BLOCK_SIZE_W) + allocated_bytes(); for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) { - allocated -= BLOCK_SIZE_W; + allocated -= BLOCK_SIZE_W; } if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) { - allocated -= (current_nursery->start + BLOCK_SIZE_W) - - current_nursery->free; + allocated -= (current_nursery->start + BLOCK_SIZE_W) + - current_nursery->free; } #endif + } total_allocated += allocated; return allocated; @@ -715,7 +904,7 @@ calcLive(void) step *stp; if (RtsFlags.GcFlags.generations == 1) { - live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W + + live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W + ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_); return live; } @@ -734,6 +923,9 @@ calcLive(void) live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start) / sizeof(W_); } + if (stp->scavd_hp != NULL) { + live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp; + } } } return live; @@ -780,10 +972,32 @@ calcNeeded(void) #ifdef DEBUG +static lnat +stepBlocks (step *stp) +{ + lnat total_blocks; + bdescr *bd; + + total_blocks = stp->n_blocks; + total_blocks += stp->n_old_blocks; + for (bd = stp->large_objects; bd; bd = bd->link) { + total_blocks += bd->blocks; + /* hack for megablock groups: they have an extra block or two in + the second and subsequent megablocks where the block + descriptors would normally go. + */ + if (bd->blocks > BLOCKS_PER_MBLOCK) { + total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK) + * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE)); + } + } + return total_blocks; +} + void memInventory(void) { - nat g, s; + nat g, s, i; step *stp; bdescr *bd; lnat total_blocks = 0, free_blocks = 0; @@ -791,27 +1005,29 @@ memInventory(void) /* count the blocks we current have */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - stp = &generations[g].steps[s]; - total_blocks += stp->n_blocks; - if (RtsFlags.GcFlags.generations == 1) { - /* two-space collector has a to-space too :-) */ - total_blocks += g0s0->n_to_blocks; + for (i = 0; i < n_capabilities; i++) { + for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) { + total_blocks += bd->blocks; + } + } + for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { + total_blocks += bd->blocks; } - for (bd = stp->large_objects; bd; bd = bd->link) { - total_blocks += bd->blocks; - /* hack for megablock groups: they have an extra block or two in - the second and subsequent megablocks where the block - descriptors would normally go. - */ - if (bd->blocks > BLOCKS_PER_MBLOCK) { - total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK) - * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE)); - } + for (s = 0; s < generations[g].n_steps; s++) { + if (g==0 && s==0) continue; + stp = &generations[g].steps[s]; + total_blocks += stepBlocks(stp); } - } } + for (i = 0; i < n_nurseries; i++) { + total_blocks += stepBlocks(&nurseries[i]); + } +#ifdef THREADED_RTS + // We put pinned object blocks in g0s0, so better count blocks there too. + total_blocks += stepBlocks(g0s0); +#endif + /* any blocks held by allocate() */ for (bd = small_alloc_list; bd; bd = bd->link) { total_blocks += bd->blocks; @@ -857,29 +1073,52 @@ checkSanity( void ) nat g, s; if (RtsFlags.GcFlags.generations == 1) { - checkHeap(g0s0->to_blocks); + checkHeap(g0s0->blocks); checkChain(g0s0->large_objects); } else { for (g = 0; g < RtsFlags.GcFlags.generations; g++) { for (s = 0; s < generations[g].n_steps; s++) { + if (g == 0 && s == 0) { continue; } ASSERT(countBlocks(generations[g].steps[s].blocks) == generations[g].steps[s].n_blocks); ASSERT(countBlocks(generations[g].steps[s].large_objects) == generations[g].steps[s].n_large_blocks); - if (g == 0 && s == 0) { continue; } checkHeap(generations[g].steps[s].blocks); checkChain(generations[g].steps[s].large_objects); if (g > 0) { checkMutableList(generations[g].mut_list, g); - checkMutOnceList(generations[g].mut_once_list, g); } } } + + for (s = 0; s < n_nurseries; s++) { + ASSERT(countBlocks(nurseries[s].blocks) + == nurseries[s].n_blocks); + ASSERT(countBlocks(nurseries[s].large_objects) + == nurseries[s].n_large_blocks); + } + checkFreeListSanity(); } } +/* Nursery sanity check */ +void +checkNurserySanity( step *stp ) +{ + bdescr *bd, *prev; + nat blocks = 0; + + prev = NULL; + for (bd = stp->blocks; bd != NULL; bd = bd->link) { + ASSERT(bd->u.back == prev); + prev = bd; + blocks += bd->blocks; + } + ASSERT(blocks == stp->n_blocks); +} + // handy function for use in gdb, because Bdescr() is inlined. extern bdescr *_bdescr( StgPtr p );