X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FStorage.c;h=3ede82d14157bc4d6e2e223f61c0f54a095e7e85;hb=89eac8928317774fdc3f283d78d3ff3cb315db5e;hp=d131da9e1ef3791485cc8b210940f7dea066165a;hpb=cb3cb473854e815784375ad23cc5081621a95ce8;p=ghc-hetmet.git diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index d131da9..3ede82d 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -1,9 +1,14 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team, 1998-2004 + * (c) The GHC Team, 1998-2006 * * Storage manager front end * + * Documentation on the architecture of the Storage Manager can be + * found in the online commentary: + * + * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage + * * ---------------------------------------------------------------------------*/ #include "PosixSource.h" @@ -35,14 +40,10 @@ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; -bdescr *small_alloc_list; /* allocate()d small objects */ bdescr *pinned_object_block; /* allocate pinned objects into this block */ nat alloc_blocks; /* number of allocate()d blocks since GC */ nat alloc_blocks_lim; /* approximate limit on alloc_blocks */ -StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */ -StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */ - generation *generations = NULL; /* all the generations */ generation *g0 = NULL; /* generation 0, for convenience */ generation *oldest_gen = NULL; /* oldest generation, for convenience */ @@ -110,10 +111,13 @@ initStorage( void ) return; } + initMBlocks(); + /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be * doing something reasonable. */ - ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info)); + /* We use the NOT_NULL variant or gcc warns that the test is always true */ + ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL(&stg_BLACKHOLE_info)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure)); ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure)); @@ -183,12 +187,11 @@ initStorage( void ) #ifdef THREADED_RTS n_nurseries = n_capabilities; - nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), - "initStorage: nurseries"); #else n_nurseries = 1; - nurseries = g0->steps; // just share nurseries[0] with g0s0 -#endif +#endif + nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), + "initStorage: nurseries"); /* Initialise all steps */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) { @@ -197,11 +200,9 @@ initStorage( void ) } } -#ifdef THREADED_RTS for (s = 0; s < n_nurseries; s++) { initStep(&nurseries[s], 0, s); } -#endif /* Set up the destination pointers in each younger gen. step */ for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) { @@ -212,11 +213,9 @@ initStorage( void ) } oldest_gen->steps[0].to = &oldest_gen->steps[0]; -#ifdef THREADED_RTS for (s = 0; s < n_nurseries; s++) { nurseries[s].to = generations[0].steps[0].to; } -#endif /* The oldest generation has one step. */ if (RtsFlags.GcFlags.compact) { @@ -227,24 +226,15 @@ initStorage( void ) } } -#ifdef THREADED_RTS - if (RtsFlags.GcFlags.generations == 1) { - errorBelch("-G1 is incompatible with -threaded"); - stg_exit(EXIT_FAILURE); - } -#endif - - /* generation 0 is special: that's the nursery */ generations[0].max_blocks = 0; + g0s0 = &generations[0].steps[0]; - /* G0S0: the allocation area. Policy: keep the allocation area + /* The allocation area. Policy: keep the allocation area * small to begin with, even if we have a large suggested heap * size. Reason: we're going to do a major collection first, and we * don't want it to be a big one. This vague idea is borne out by * rigorous experimental evidence. */ - g0s0 = &generations[0].steps[0]; - allocNurseries(); weak_ptr_list = NULL; @@ -252,7 +242,6 @@ initStorage( void ) revertible_caf_list = NULL; /* initialise the allocate() interface */ - small_alloc_list = NULL; alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; @@ -283,6 +272,7 @@ freeStorage (void) closeMutex(&sm_mutex); closeMutex(&atomic_modify_mutvar_mutex); #endif + stgFree(nurseries); } /* ----------------------------------------------------------------------------- @@ -450,7 +440,6 @@ allocNurseries( void ) nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize; nurseries[i].old_blocks = NULL; nurseries[i].n_old_blocks = 0; - /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */ } assignNurseriesToCapabilities(); } @@ -552,57 +541,75 @@ resizeNurseries (nat blocks) /* ----------------------------------------------------------------------------- The allocate() interface - allocate(n) always succeeds, and returns a chunk of memory n words - long. n can be larger than the size of a block if necessary, in - which case a contiguous block group will be allocated. + allocateInGen() function allocates memory directly into a specific + generation. It always succeeds, and returns a chunk of memory n + words long. n can be larger than the size of a block if necessary, + in which case a contiguous block group will be allocated. + + allocate(n) is equivalent to allocateInGen(g0). -------------------------------------------------------------------------- */ StgPtr -allocate( nat n ) +allocateInGen (generation *g, nat n) { + step *stp; bdescr *bd; - StgPtr p; + StgPtr ret; ACQUIRE_SM_LOCK; - + TICK_ALLOC_HEAP_NOCTR(n); CCS_ALLOC(CCCS,n); - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + stp = &g->steps[0]; + + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) + { nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + + // Attempting to allocate an object larger than maxHeapSize + // should definitely be disallowed. (bug #1791) + if (RtsFlags.GcFlags.maxHeapSize > 0 && + req_blocks >= RtsFlags.GcFlags.maxHeapSize) { + heapOverflow(); + } + bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += req_blocks; - bd->gen_no = 0; - bd->step = g0s0; + dbl_link_onto(bd, &stp->large_objects); + stp->n_large_blocks += bd->blocks; // might be larger than req_blocks + bd->gen_no = g->no; + bd->step = stp; bd->flags = BF_LARGE; bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; - - /* small allocation ( alloc_HpLim) { - if (small_alloc_list) { - small_alloc_list->free = alloc_Hp; - } - bd = allocBlock(); - bd->link = small_alloc_list; - small_alloc_list = bd; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = 0; - alloc_Hp = bd->start; - alloc_HpLim = bd->start + BLOCK_SIZE_W; - alloc_blocks++; + ret = bd->start; } - - p = alloc_Hp; - alloc_Hp += n; + else + { + // small allocation (blocks; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + bd = allocBlock(); + bd->gen_no = g->no; + bd->step = stp; + bd->flags = 0; + bd->link = stp->blocks; + stp->blocks = bd; + stp->n_blocks++; + alloc_blocks++; + } + ret = bd->free; + bd->free += n; + } + RELEASE_SM_LOCK; - return p; + + return ret; +} + +StgPtr +allocate (nat n) +{ + return allocateInGen(g0,n); } lnat @@ -610,7 +617,7 @@ allocatedBytes( void ) { lnat allocated; - allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp); + allocated = alloc_blocks * BLOCK_SIZE_W; if (pinned_object_block != NULL) { allocated -= (pinned_object_block->start + BLOCK_SIZE_W) - pinned_object_block->free; @@ -619,16 +626,6 @@ allocatedBytes( void ) return allocated; } -void -tidyAllocateLists (void) -{ - if (small_alloc_list != NULL) { - ASSERT(alloc_Hp >= small_alloc_list->start && - alloc_Hp <= small_alloc_list->start + BLOCK_SIZE); - small_alloc_list->free = alloc_Hp; - } -} - /* ----------------------------------------------------------------------------- allocateLocal() @@ -648,59 +645,48 @@ allocateLocal (Capability *cap, nat n) bdescr *bd; StgPtr p; - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - ACQUIRE_SM_LOCK; - bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += req_blocks; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = BF_LARGE; - bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; - - /* small allocation (r.rCurrentAlloc; - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + /* small allocation (r.rCurrentNursery->link; - - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { - // The nursery is empty, or the next block is already - // full: allocate a fresh block (we can't fail here). - ACQUIRE_SM_LOCK; - bd = allocBlock(); - cap->r.rNursery->n_blocks++; - RELEASE_SM_LOCK; - bd->gen_no = 0; - bd->step = cap->r.rNursery; - bd->flags = 0; - } else { - // we have a block in the nursery: take it and put - // it at the *front* of the nursery list, and use it - // to allocate() from. - cap->r.rCurrentNursery->link = bd->link; - if (bd->link != NULL) { - bd->link->u.back = cap->r.rCurrentNursery; - } - } - dbl_link_onto(bd, &cap->r.rNursery->blocks); - cap->r.rCurrentAlloc = bd; - IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); - } + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + + bd = cap->r.rCurrentAlloc; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + + // The CurrentAlloc block is full, we need to find another + // one. First, we try taking the next block from the + // nursery: + bd = cap->r.rCurrentNursery->link; + + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + // The nursery is empty, or the next block is already + // full: allocate a fresh block (we can't fail here). + ACQUIRE_SM_LOCK; + bd = allocBlock(); + cap->r.rNursery->n_blocks++; + RELEASE_SM_LOCK; + bd->gen_no = 0; + bd->step = cap->r.rNursery; + bd->flags = 0; + // NO: alloc_blocks++; + // calcAllocated() uses the size of the nursery, and we've + // already bumpted nursery->n_blocks above. + } else { + // we have a block in the nursery: take it and put + // it at the *front* of the nursery list, and use it + // to allocate() from. + cap->r.rCurrentNursery->link = bd->link; + if (bd->link != NULL) { + bd->link->u.back = cap->r.rCurrentNursery; + } + } + dbl_link_onto(bd, &cap->r.rNursery->blocks); + cap->r.rCurrentAlloc = bd; + IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); } p = bd->free; bd->free += n; @@ -759,6 +745,7 @@ allocatePinned( nat n ) if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { pinned_object_block = bd = allocBlock(); dbl_link_onto(bd, &g0s0->large_objects); + g0s0->n_large_blocks++; bd->gen_no = 0; bd->step = g0s0; bd->flags = BF_PINNED | BF_LARGE; @@ -773,12 +760,15 @@ allocatePinned( nat n ) } /* ----------------------------------------------------------------------------- + Write Barriers + -------------------------------------------------------------------------- */ + +/* This is the write barrier for MUT_VARs, a.k.a. IORefs. A MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY and is put on the mutable list. - -------------------------------------------------------------------------- */ - +*/ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) { @@ -791,6 +781,23 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) } } +/* + This is the write barrier for MVARs. An MVAR_CLEAN objects is not + on the mutable list; a MVAR_DIRTY is. When written to, a + MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list. + The check for MVAR_CLEAN is inlined at the call site for speed, + this really does make a difference on concurrency-heavy benchmarks + such as Chaneneos and cheap-concurrency. +*/ +void +dirty_MVAR(StgRegTable *reg, StgClosure *p) +{ + Capability *cap = regTableToCapability(reg); + bdescr *bd; + bd = Bdescr((StgPtr)p); + if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no); +} + /* ----------------------------------------------------------------------------- Allocation functions for GMP. @@ -912,9 +919,7 @@ calcLive(void) step *stp; if (RtsFlags.GcFlags.generations == 1) { - live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W + - ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_); - return live; + return (g0s0->n_large_blocks + g0s0->n_blocks) * BLOCK_SIZE_W; } for (g = 0; g < RtsFlags.GcFlags.generations; g++) { @@ -926,14 +931,7 @@ calcLive(void) continue; } stp = &generations[g].steps[s]; - live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W; - if (stp->hp_bd != NULL) { - live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start) - / sizeof(W_); - } - if (stp->scavd_hp != NULL) { - live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp; - } + live += (stp->n_large_blocks + stp->n_blocks) * BLOCK_SIZE_W; } } return live; @@ -983,6 +981,11 @@ calcNeeded(void) in the page, and when the page is emptied (all objects on the page are free) we free the page again, not forgetting to make it non-executable. + + TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that + the linker cannot use allocateExec for loading object code files + on Windows. Once allocateExec can handle larger objects, the linker + should be modified to use allocateExec instead of VirtualAlloc. ------------------------------------------------------------------------- */ static bdescr *exec_block; @@ -1044,20 +1047,17 @@ void freeExec (void *addr) bd->gen_no -= *(StgPtr)p; *(StgPtr)p = 0; - // Free the block if it is empty, but not if it is the block at - // the head of the queue. - if (bd->gen_no == 0 && bd != exec_block) { - debugTrace(DEBUG_gc, "free exec block %p", bd->start); - if (bd->u.back) { - bd->u.back->link = bd->link; - } else { - exec_block = bd->link; - } - if (bd->link) { - bd->link->u.back = bd->u.back; - } - setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse); - freeGroup(bd); + if (bd->gen_no == 0) { + // Free the block if it is empty, but not if it is the block at + // the head of the queue. + if (bd != exec_block) { + debugTrace(DEBUG_gc, "free exec block %p", bd->start); + dbl_link_remove(bd, &exec_block); + setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse); + freeGroup(bd); + } else { + bd->free = bd->start; + } } RELEASE_SM_LOCK @@ -1073,26 +1073,43 @@ void freeExec (void *addr) #ifdef DEBUG -static lnat -stepBlocks (step *stp) +nat +countBlocks(bdescr *bd) { - lnat total_blocks; - bdescr *bd; + nat n; + for (n=0; bd != NULL; bd=bd->link) { + n += bd->blocks; + } + return n; +} - total_blocks = stp->n_blocks; - total_blocks += stp->n_old_blocks; - for (bd = stp->large_objects; bd; bd = bd->link) { - total_blocks += bd->blocks; - /* hack for megablock groups: they have an extra block or two in - the second and subsequent megablocks where the block - descriptors would normally go. - */ +// (*1) Just like countBlocks, except that we adjust the count for a +// megablock group so that it doesn't include the extra few blocks +// that would be taken up by block descriptors in the second and +// subsequent megablock. This is so we can tally the count with the +// number of blocks allocated in the system, for memInventory(). +static nat +countAllocdBlocks(bdescr *bd) +{ + nat n; + for (n=0; bd != NULL; bd=bd->link) { + n += bd->blocks; + // hack for megablock groups: see (*1) above if (bd->blocks > BLOCKS_PER_MBLOCK) { - total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK) + n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK) * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE)); } } - return total_blocks; + return n; +} + +static lnat +stepBlocks (step *stp) +{ + ASSERT(countBlocks(stp->blocks) == stp->n_blocks); + ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks); + return stp->n_blocks + stp->n_old_blocks + + countAllocdBlocks(stp->large_objects); } void @@ -1100,78 +1117,71 @@ memInventory(void) { nat g, s, i; step *stp; - bdescr *bd; - lnat total_blocks = 0, free_blocks = 0; + lnat gen_blocks[RtsFlags.GcFlags.generations]; + lnat nursery_blocks, retainer_blocks, + arena_blocks, exec_blocks; + lnat live_blocks = 0, free_blocks = 0; - /* count the blocks we current have */ + // count the blocks we current have for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + gen_blocks[g] = 0; for (i = 0; i < n_capabilities; i++) { - for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) { - total_blocks += bd->blocks; - } + gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]); } - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - total_blocks += bd->blocks; - } + gen_blocks[g] += countAllocdBlocks(generations[g].mut_list); for (s = 0; s < generations[g].n_steps; s++) { - if (g==0 && s==0) continue; stp = &generations[g].steps[s]; - total_blocks += stepBlocks(stp); + gen_blocks[g] += stepBlocks(stp); } } + nursery_blocks = 0; for (i = 0; i < n_nurseries; i++) { - total_blocks += stepBlocks(&nurseries[i]); - } -#ifdef THREADED_RTS - // We put pinned object blocks in g0s0, so better count blocks there too. - total_blocks += stepBlocks(g0s0); -#endif - - /* any blocks held by allocate() */ - for (bd = small_alloc_list; bd; bd = bd->link) { - total_blocks += bd->blocks; + nursery_blocks += stepBlocks(&nurseries[i]); } + retainer_blocks = 0; #ifdef PROFILING if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { - total_blocks += retainerStackBlocks(); + retainer_blocks = retainerStackBlocks(); } #endif // count the blocks allocated by the arena allocator - total_blocks += arenaBlocks(); + arena_blocks = arenaBlocks(); // count the blocks containing executable memory - for (bd = exec_block; bd; bd = bd->link) { - total_blocks += bd->blocks; - } + exec_blocks = countAllocdBlocks(exec_block); /* count the blocks on the free list */ free_blocks = countFreeList(); - if (total_blocks + free_blocks != mblocks_allocated * - BLOCKS_PER_MBLOCK) { - debugBelch("Blocks: %ld live + %ld free = %ld total (%ld around)\n", - total_blocks, free_blocks, total_blocks + free_blocks, - mblocks_allocated * BLOCKS_PER_MBLOCK); + live_blocks = 0; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + live_blocks += gen_blocks[g]; } + live_blocks += nursery_blocks + + + retainer_blocks + arena_blocks + exec_blocks; - ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK); + if (live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK) + { + debugBelch("Memory leak detected\n"); + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + debugBelch(" gen %d blocks : %4lu\n", g, gen_blocks[g]); + } + debugBelch(" nursery : %4lu\n", nursery_blocks); + debugBelch(" retainer : %4lu\n", retainer_blocks); + debugBelch(" arena blocks : %4lu\n", arena_blocks); + debugBelch(" exec : %4lu\n", exec_blocks); + debugBelch(" free : %4lu\n", free_blocks); + debugBelch(" total : %4lu\n\n", live_blocks + free_blocks); + debugBelch(" in system : %4lu\n", mblocks_allocated * BLOCKS_PER_MBLOCK); + ASSERT(0); + } } -nat -countBlocks(bdescr *bd) -{ - nat n; - for (n=0; bd != NULL; bd=bd->link) { - n += bd->blocks; - } - return n; -} - /* Full heap sanity check. */ void checkSanity( void )