X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FStorage.c;h=0bc15c075e6d8732843163acd84bcdb1504dcd1c;hb=1231d33279f25e931bcac1bac046b0f839290d61;hp=d8381e04a37636bb270580365398f0696467e296;hpb=6c278790358dc723fe574a593203993be46ab2fb;p=ghc-hetmet.git diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index d8381e0..0bc15c0 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -29,6 +29,8 @@ #include "RetainerProfile.h" // for counting memory blocks (memInventory) #include "OSMem.h" #include "Trace.h" +#include "GC.h" +#include "GCUtils.h" #include #include @@ -40,14 +42,10 @@ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; -bdescr *small_alloc_list; /* allocate()d small objects */ bdescr *pinned_object_block; /* allocate pinned objects into this block */ nat alloc_blocks; /* number of allocate()d blocks since GC */ nat alloc_blocks_lim; /* approximate limit on alloc_blocks */ -StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */ -StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */ - generation *generations = NULL; /* all the generations */ generation *g0 = NULL; /* generation 0, for convenience */ generation *oldest_gen = NULL; /* oldest generation, for convenience */ @@ -88,20 +86,16 @@ initStep (step *stp, int g, int s) stp->n_old_blocks = 0; stp->gen = &generations[g]; stp->gen_no = g; - stp->hp = NULL; - stp->hpLim = NULL; - stp->hp_bd = NULL; - stp->scavd_hp = NULL; - stp->scavd_hpLim = NULL; - stp->scan = NULL; - stp->scan_bd = NULL; stp->large_objects = NULL; stp->n_large_blocks = 0; - stp->new_large_objects = NULL; stp->scavenged_large_objects = NULL; stp->n_scavenged_large_blocks = 0; stp->is_compacted = 0; stp->bitmap = NULL; +#ifdef THREADED_RTS + initSpinLock(&stp->sync_todo); + initSpinLock(&stp->sync_large_objects); +#endif } void @@ -109,6 +103,7 @@ initStorage( void ) { nat g, s; generation *gen; + step *step_arr; if (generations != NULL) { // multi-init protection @@ -120,7 +115,8 @@ initStorage( void ) /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be * doing something reasonable. */ - ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info)); + /* We use the NOT_NULL variant or gcc warns that the test is always true */ + ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL(&stg_BLACKHOLE_info)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure)); ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure)); @@ -151,6 +147,15 @@ initStorage( void ) * sizeof(struct generation_), "initStorage: gens"); + /* allocate all the steps into an array. It is important that we do + it this way, because we need the invariant that two step pointers + can be directly compared to see which is the oldest. + Remember that the last generation has only one step. */ + step_arr = stgMallocBytes(sizeof(struct step_) + * (1 + ((RtsFlags.GcFlags.generations - 1) + * RtsFlags.GcFlags.steps)), + "initStorage: steps"); + /* Initialise all generations */ for(g = 0; g < RtsFlags.GcFlags.generations; g++) { gen = &generations[g]; @@ -171,31 +176,28 @@ initStorage( void ) /* Oldest generation: one step */ oldest_gen->n_steps = 1; - oldest_gen->steps = - stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step"); + oldest_gen->steps = step_arr + (RtsFlags.GcFlags.generations - 1) + * RtsFlags.GcFlags.steps; /* set up all except the oldest generation with 2 steps */ for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) { generations[g].n_steps = RtsFlags.GcFlags.steps; - generations[g].steps = - stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_), - "initStorage: steps"); + generations[g].steps = step_arr + g * RtsFlags.GcFlags.steps; } } else { /* single generation, i.e. a two-space collector */ g0->n_steps = 1; - g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps"); + g0->steps = step_arr; } #ifdef THREADED_RTS n_nurseries = n_capabilities; - nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), - "initStorage: nurseries"); #else n_nurseries = 1; - nurseries = g0->steps; // just share nurseries[0] with g0s0 -#endif +#endif + nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), + "initStorage: nurseries"); /* Initialise all steps */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) { @@ -204,11 +206,9 @@ initStorage( void ) } } -#ifdef THREADED_RTS for (s = 0; s < n_nurseries; s++) { initStep(&nurseries[s], 0, s); } -#endif /* Set up the destination pointers in each younger gen. step */ for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) { @@ -219,11 +219,9 @@ initStorage( void ) } oldest_gen->steps[0].to = &oldest_gen->steps[0]; -#ifdef THREADED_RTS for (s = 0; s < n_nurseries; s++) { nurseries[s].to = generations[0].steps[0].to; } -#endif /* The oldest generation has one step. */ if (RtsFlags.GcFlags.compact) { @@ -234,24 +232,15 @@ initStorage( void ) } } -#ifdef THREADED_RTS - if (RtsFlags.GcFlags.generations == 1) { - errorBelch("-G1 is incompatible with -threaded"); - stg_exit(EXIT_FAILURE); - } -#endif - - /* generation 0 is special: that's the nursery */ generations[0].max_blocks = 0; + g0s0 = &generations[0].steps[0]; - /* G0S0: the allocation area. Policy: keep the allocation area + /* The allocation area. Policy: keep the allocation area * small to begin with, even if we have a large suggested heap * size. Reason: we're going to do a major collection first, and we * don't want it to be a big one. This vague idea is borne out by * rigorous experimental evidence. */ - g0s0 = &generations[0].steps[0]; - allocNurseries(); weak_ptr_list = NULL; @@ -259,13 +248,16 @@ initStorage( void ) revertible_caf_list = NULL; /* initialise the allocate() interface */ - small_alloc_list = NULL; alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; /* Tell GNU multi-precision pkg about our custom alloc functions */ mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP); +#ifdef THREADED_RTS + initSpinLock(&gc_alloc_block_sync); +#endif + IF_DEBUG(gc, statDescribeGens()); RELEASE_SM_LOCK; @@ -280,17 +272,14 @@ exitStorage (void) void freeStorage (void) { - nat g; - - for(g = 0; g < RtsFlags.GcFlags.generations; g++) - stgFree(generations[g].steps); + stgFree(g0s0); // frees all the steps stgFree(generations); freeAllMBlocks(); #if defined(THREADED_RTS) closeMutex(&sm_mutex); closeMutex(&atomic_modify_mutvar_mutex); - stgFree(nurseries); #endif + stgFree(nurseries); } /* ----------------------------------------------------------------------------- @@ -559,57 +548,75 @@ resizeNurseries (nat blocks) /* ----------------------------------------------------------------------------- The allocate() interface - allocate(n) always succeeds, and returns a chunk of memory n words - long. n can be larger than the size of a block if necessary, in - which case a contiguous block group will be allocated. + allocateInGen() function allocates memory directly into a specific + generation. It always succeeds, and returns a chunk of memory n + words long. n can be larger than the size of a block if necessary, + in which case a contiguous block group will be allocated. + + allocate(n) is equivalent to allocateInGen(g0). -------------------------------------------------------------------------- */ StgPtr -allocate( nat n ) +allocateInGen (generation *g, nat n) { + step *stp; bdescr *bd; - StgPtr p; + StgPtr ret; ACQUIRE_SM_LOCK; - + TICK_ALLOC_HEAP_NOCTR(n); CCS_ALLOC(CCCS,n); - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + stp = &g->steps[0]; + + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) + { nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + + // Attempting to allocate an object larger than maxHeapSize + // should definitely be disallowed. (bug #1791) + if (RtsFlags.GcFlags.maxHeapSize > 0 && + req_blocks >= RtsFlags.GcFlags.maxHeapSize) { + heapOverflow(); + } + bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += bd->blocks; // might be larger than req_blocks - bd->gen_no = 0; - bd->step = g0s0; + dbl_link_onto(bd, &stp->large_objects); + stp->n_large_blocks += bd->blocks; // might be larger than req_blocks + bd->gen_no = g->no; + bd->step = stp; bd->flags = BF_LARGE; bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; - - /* small allocation ( alloc_HpLim) { - if (small_alloc_list) { - small_alloc_list->free = alloc_Hp; - } - bd = allocBlock(); - bd->link = small_alloc_list; - small_alloc_list = bd; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = 0; - alloc_Hp = bd->start; - alloc_HpLim = bd->start + BLOCK_SIZE_W; - alloc_blocks++; + ret = bd->start; } - - p = alloc_Hp; - alloc_Hp += n; + else + { + // small allocation (blocks; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + bd = allocBlock(); + bd->gen_no = g->no; + bd->step = stp; + bd->flags = 0; + bd->link = stp->blocks; + stp->blocks = bd; + stp->n_blocks++; + alloc_blocks++; + } + ret = bd->free; + bd->free += n; + } + RELEASE_SM_LOCK; - return p; + + return ret; +} + +StgPtr +allocate (nat n) +{ + return allocateInGen(g0,n); } lnat @@ -617,7 +624,7 @@ allocatedBytes( void ) { lnat allocated; - allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp); + allocated = alloc_blocks * BLOCK_SIZE_W; if (pinned_object_block != NULL) { allocated -= (pinned_object_block->start + BLOCK_SIZE_W) - pinned_object_block->free; @@ -626,16 +633,6 @@ allocatedBytes( void ) return allocated; } -void -tidyAllocateLists (void) -{ - if (small_alloc_list != NULL) { - ASSERT(alloc_Hp >= small_alloc_list->start && - alloc_Hp <= small_alloc_list->start + BLOCK_SIZE); - small_alloc_list->free = alloc_Hp; - } -} - /* ----------------------------------------------------------------------------- allocateLocal() @@ -655,60 +652,48 @@ allocateLocal (Capability *cap, nat n) bdescr *bd; StgPtr p; - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - ACQUIRE_SM_LOCK; - bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += bd->blocks; // might be larger than req_blocks - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = BF_LARGE; - bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; - - /* small allocation (r.rCurrentAlloc; - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + /* small allocation (r.rCurrentNursery->link; - - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { - // The nursery is empty, or the next block is already - // full: allocate a fresh block (we can't fail here). - ACQUIRE_SM_LOCK; - bd = allocBlock(); - cap->r.rNursery->n_blocks++; - RELEASE_SM_LOCK; - bd->gen_no = 0; - bd->step = cap->r.rNursery; - bd->flags = 0; - alloc_blocks++; - } else { - // we have a block in the nursery: take it and put - // it at the *front* of the nursery list, and use it - // to allocate() from. - cap->r.rCurrentNursery->link = bd->link; - if (bd->link != NULL) { - bd->link->u.back = cap->r.rCurrentNursery; - } - } - dbl_link_onto(bd, &cap->r.rNursery->blocks); - cap->r.rCurrentAlloc = bd; - IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); - } + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + + bd = cap->r.rCurrentAlloc; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + + // The CurrentAlloc block is full, we need to find another + // one. First, we try taking the next block from the + // nursery: + bd = cap->r.rCurrentNursery->link; + + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + // The nursery is empty, or the next block is already + // full: allocate a fresh block (we can't fail here). + ACQUIRE_SM_LOCK; + bd = allocBlock(); + cap->r.rNursery->n_blocks++; + RELEASE_SM_LOCK; + bd->gen_no = 0; + bd->step = cap->r.rNursery; + bd->flags = 0; + // NO: alloc_blocks++; + // calcAllocated() uses the size of the nursery, and we've + // already bumpted nursery->n_blocks above. + } else { + // we have a block in the nursery: take it and put + // it at the *front* of the nursery list, and use it + // to allocate() from. + cap->r.rCurrentNursery->link = bd->link; + if (bd->link != NULL) { + bd->link->u.back = cap->r.rCurrentNursery; + } + } + dbl_link_onto(bd, &cap->r.rNursery->blocks); + cap->r.rCurrentAlloc = bd; + IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); } p = bd->free; bd->free += n; @@ -933,15 +918,15 @@ calcAllocated( void ) /* Approximate the amount of live data in the heap. To be called just * after garbage collection (see GarbageCollect()). */ -extern lnat -calcLive(void) +lnat +calcLiveBlocks(void) { nat g, s; lnat live = 0; step *stp; if (RtsFlags.GcFlags.generations == 1) { - return (g0s0->n_large_blocks + g0s0->n_blocks) * BLOCK_SIZE_W; + return g0s0->n_large_blocks + g0s0->n_blocks; } for (g = 0; g < RtsFlags.GcFlags.generations; g++) { @@ -953,12 +938,49 @@ calcLive(void) continue; } stp = &generations[g].steps[s]; - live += (stp->n_large_blocks + stp->n_blocks) * BLOCK_SIZE_W; + live += stp->n_large_blocks + stp->n_blocks; } } return live; } +lnat +countOccupied(bdescr *bd) +{ + lnat words; + + words = 0; + for (; bd != NULL; bd = bd->link) { + words += bd->free - bd->start; + } + return words; +} + +// Return an accurate count of the live data in the heap, excluding +// generation 0. +lnat +calcLiveWords(void) +{ + nat g, s; + lnat live; + step *stp; + + if (RtsFlags.GcFlags.generations == 1) { + return countOccupied(g0s0->blocks) + countOccupied(g0s0->large_objects); + } + + live = 0; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (s = 0; s < generations[g].n_steps; s++) { + if (g == 0 && s == 0) continue; + stp = &generations[g].steps[s]; + live += countOccupied(stp->blocks) + + countOccupied(stp->large_objects); + } + } + return live; +} + /* Approximate the number of blocks that will be needed at the next * garbage collection. * @@ -981,9 +1003,9 @@ calcNeeded(void) generations[g].steps[0].n_large_blocks > generations[g].max_blocks && stp->is_compacted == 0) { - needed += 2 * stp->n_blocks; + needed += 2 * stp->n_blocks + stp->n_large_blocks; } else { - needed += stp->n_blocks; + needed += stp->n_blocks + stp->n_large_blocks; } } } @@ -1095,6 +1117,21 @@ void freeExec (void *addr) #ifdef DEBUG +// Useful for finding partially full blocks in gdb +void findSlop(bdescr *bd); +void findSlop(bdescr *bd) +{ + lnat slop; + + for (; bd != NULL; bd = bd->link) { + slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start); + if (slop > (1024/sizeof(W_))) { + debugBelch("block at %p (bdescr %p) has %ldKB slop\n", + bd->start, bd, slop / (1024/sizeof(W_))); + } + } +} + nat countBlocks(bdescr *bd) { @@ -1140,7 +1177,7 @@ memInventory(void) nat g, s, i; step *stp; lnat gen_blocks[RtsFlags.GcFlags.generations]; - lnat nursery_blocks, allocate_blocks, retainer_blocks, + lnat nursery_blocks, retainer_blocks, arena_blocks, exec_blocks; lnat live_blocks = 0, free_blocks = 0; @@ -1153,11 +1190,6 @@ memInventory(void) } gen_blocks[g] += countAllocdBlocks(generations[g].mut_list); for (s = 0; s < generations[g].n_steps; s++) { -#if !defined(THREADED_RTS) - // We put pinned object blocks in g0s0, so better count - // blocks there too. - if (g==0 && s==0) continue; -#endif stp = &generations[g].steps[s]; gen_blocks[g] += stepBlocks(stp); } @@ -1168,9 +1200,6 @@ memInventory(void) nursery_blocks += stepBlocks(&nurseries[i]); } - /* any blocks held by allocate() */ - allocate_blocks = countAllocdBlocks(small_alloc_list); - retainer_blocks = 0; #ifdef PROFILING if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { @@ -1191,7 +1220,7 @@ memInventory(void) for (g = 0; g < RtsFlags.GcFlags.generations; g++) { live_blocks += gen_blocks[g]; } - live_blocks += nursery_blocks + allocate_blocks + live_blocks += nursery_blocks + + retainer_blocks + arena_blocks + exec_blocks; if (live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK) @@ -1201,7 +1230,6 @@ memInventory(void) debugBelch(" gen %d blocks : %4lu\n", g, gen_blocks[g]); } debugBelch(" nursery : %4lu\n", nursery_blocks); - debugBelch(" allocate() : %4lu\n", allocate_blocks); debugBelch(" retainer : %4lu\n", retainer_blocks); debugBelch(" arena blocks : %4lu\n", arena_blocks); debugBelch(" exec : %4lu\n", exec_blocks);