From c1540c3def6281ea67b5a4ce7499b1aa982faa25 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Fri, 12 Oct 2007 12:44:13 +0000 Subject: [PATCH] Add allocateInGen() for allocating in a specific generation, and cleanups Now allocate() is a synonym for allocateInGen(). I also made various cleanups: there is now less special-case code for supporting -G1 (two-space collection), and -G1 now works with -threaded. --- includes/Storage.h | 12 ++- rts/LdvProfile.c | 2 +- rts/ProfHeap.c | 7 -- rts/RetainerProfile.c | 2 +- rts/sm/GC.c | 43 ++-------- rts/sm/Storage.c | 224 ++++++++++++++++++++----------------------------- 6 files changed, 110 insertions(+), 180 deletions(-) diff --git a/includes/Storage.h b/includes/Storage.h index 92a856c..9257bcd 100644 --- a/includes/Storage.h +++ b/includes/Storage.h @@ -114,9 +114,13 @@ extern void freeStorage(void); /* ----------------------------------------------------------------------------- Generic allocation - StgPtr allocate(nat n) Allocates a chunk of contiguous store - n words long, returning a pointer to - the first word. Always succeeds. + StgPtr allocateInGen(generation *g, nat n) + Allocates a chunk of contiguous store + n words long in generation g, + returning a pointer to the first word. + Always succeeds. + + StgPtr allocate(nat n) Equaivalent to allocateInGen(g0) StgPtr allocateLocal(Capability *cap, nat n) Allocates memory from the nursery in @@ -150,6 +154,7 @@ extern void freeStorage(void); -------------------------------------------------------------------------- */ extern StgPtr allocate ( nat n ); +extern StgPtr allocateInGen ( generation *g, nat n ); extern StgPtr allocateLocal ( Capability *cap, nat n ); extern StgPtr allocatePinned ( nat n ); extern lnat allocatedBytes ( void ); @@ -476,7 +481,6 @@ extern void allocNurseries ( void ); extern void resetNurseries ( void ); extern void resizeNurseries ( nat blocks ); extern void resizeNurseriesFixed ( nat blocks ); -extern void tidyAllocateLists ( void ); extern lnat countNurseryBlocks ( void ); /* ----------------------------------------------------------------------------- diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c index ecbba8b..1838649 100644 --- a/rts/LdvProfile.c +++ b/rts/LdvProfile.c @@ -247,7 +247,7 @@ processSmallObjectPoolForDead( void ) bdescr *bd; StgPtr p; - bd = small_alloc_list; + bd = g0s0->blocks; // first block if (bd == NULL) diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index 08597b1..599b479 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -1151,13 +1151,6 @@ heapCensus( void ) #endif // Traverse the heap, collecting the census info - - // First the small_alloc_list: we have to fix the free pointer at - // the end by calling tidyAllocatedLists() first. - tidyAllocateLists(); - heapCensusChain( census, small_alloc_list ); - - // Now traverse the heap in each generation/step. if (RtsFlags.GcFlags.generations == 1) { heapCensusChain( census, g0s0->blocks ); } else { diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index 745b8e7..e963567 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -2170,7 +2170,7 @@ smallObjectPoolCheck(void) StgPtr p; static nat costSum, size; - bd = small_alloc_list; + bd = g0s0->blocks; costSum = 0; // first block diff --git a/rts/sm/GC.c b/rts/sm/GC.c index b5f4bcd..4aa210c 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -109,11 +109,6 @@ rtsBool eager_promotion; */ rtsBool failed_to_evac; -/* Saved nursery (used for 2-space collector only) - */ -static bdescr *saved_nursery; -static nat saved_n_blocks; - /* Data used for allocation area sizing. */ lnat new_blocks; // blocks allocated during this GC @@ -274,17 +269,6 @@ GarbageCollect ( rtsBool force_major_gc ) static_objects = END_OF_STATIC_LIST; scavenged_static_objects = END_OF_STATIC_LIST; - /* Save the nursery if we're doing a two-space collection. - * g0s0->blocks will be used for to-space, so we need to get the - * nursery out of the way. - */ - if (RtsFlags.GcFlags.generations == 1) { - saved_nursery = g0s0->blocks; - saved_n_blocks = g0s0->n_blocks; - g0s0->blocks = NULL; - g0s0->n_blocks = 0; - } - /* Keep a count of how many new blocks we allocated during this GC * (used for resizing the allocation area, later). */ @@ -663,7 +647,7 @@ GarbageCollect ( rtsBool force_major_gc ) * the collected steps (except the allocation area). These * freed blocks will probaby be quickly recycled. */ - if (!(g == 0 && s == 0)) { + if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) { if (stp->is_compacted) { // for a compacted step, just shift the new to-space // onto the front of the now-compacted existing blocks. @@ -817,13 +801,14 @@ GarbageCollect ( rtsBool force_major_gc ) /* Free the small objects allocated via allocate(), since this will * all have been copied into G0S1 now. */ - if (small_alloc_list != NULL) { - freeChain(small_alloc_list); + if (RtsFlags.GcFlags.generations > 1) { + if (g0s0->blocks != NULL) { + freeChain(g0s0->blocks); + g0s0->blocks = NULL; + } + g0s0->n_blocks = 0; } - small_alloc_list = NULL; alloc_blocks = 0; - alloc_Hp = NULL; - alloc_HpLim = NULL; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; // Start a new pinned_object_block @@ -853,17 +838,6 @@ GarbageCollect ( rtsBool force_major_gc ) if (RtsFlags.GcFlags.generations == 1) { nat blocks; - if (g0s0->old_blocks != NULL) { - freeChain(g0s0->old_blocks); - } - for (bd = g0s0->blocks; bd != NULL; bd = bd->link) { - bd->flags = 0; // now from-space - } - g0s0->old_blocks = g0s0->blocks; - g0s0->n_old_blocks = g0s0->n_blocks; - g0s0->blocks = saved_nursery; - g0s0->n_blocks = saved_n_blocks; - /* For a two-space collector, we need to resize the nursery. */ /* set up a new nursery. Allocate a nursery size based on a @@ -880,7 +854,7 @@ GarbageCollect ( rtsBool force_major_gc ) * performance we get from 3L bytes, reducing to the same * performance at 2L bytes. */ - blocks = g0s0->n_old_blocks; + blocks = g0s0->n_blocks; if ( RtsFlags.GcFlags.maxHeapSize != 0 && blocks * RtsFlags.GcFlags.oldGenFactor * 2 > @@ -1042,6 +1016,7 @@ isAlive(StgClosure *p) const StgInfoTable *info; bdescr *bd; StgWord tag; + StgClosure *q; while (1) { /* The tag and the pointer are split, to be merged later when needed. */ diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index d8381e0..eb29b2d 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -40,14 +40,10 @@ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; -bdescr *small_alloc_list; /* allocate()d small objects */ bdescr *pinned_object_block; /* allocate pinned objects into this block */ nat alloc_blocks; /* number of allocate()d blocks since GC */ nat alloc_blocks_lim; /* approximate limit on alloc_blocks */ -StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */ -StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */ - generation *generations = NULL; /* all the generations */ generation *g0 = NULL; /* generation 0, for convenience */ generation *oldest_gen = NULL; /* oldest generation, for convenience */ @@ -190,12 +186,11 @@ initStorage( void ) #ifdef THREADED_RTS n_nurseries = n_capabilities; - nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), - "initStorage: nurseries"); #else n_nurseries = 1; - nurseries = g0->steps; // just share nurseries[0] with g0s0 -#endif +#endif + nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), + "initStorage: nurseries"); /* Initialise all steps */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) { @@ -204,11 +199,9 @@ initStorage( void ) } } -#ifdef THREADED_RTS for (s = 0; s < n_nurseries; s++) { initStep(&nurseries[s], 0, s); } -#endif /* Set up the destination pointers in each younger gen. step */ for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) { @@ -219,11 +212,9 @@ initStorage( void ) } oldest_gen->steps[0].to = &oldest_gen->steps[0]; -#ifdef THREADED_RTS for (s = 0; s < n_nurseries; s++) { nurseries[s].to = generations[0].steps[0].to; } -#endif /* The oldest generation has one step. */ if (RtsFlags.GcFlags.compact) { @@ -234,24 +225,15 @@ initStorage( void ) } } -#ifdef THREADED_RTS - if (RtsFlags.GcFlags.generations == 1) { - errorBelch("-G1 is incompatible with -threaded"); - stg_exit(EXIT_FAILURE); - } -#endif - - /* generation 0 is special: that's the nursery */ generations[0].max_blocks = 0; + g0s0 = &generations[0].steps[0]; - /* G0S0: the allocation area. Policy: keep the allocation area + /* The allocation area. Policy: keep the allocation area * small to begin with, even if we have a large suggested heap * size. Reason: we're going to do a major collection first, and we * don't want it to be a big one. This vague idea is borne out by * rigorous experimental evidence. */ - g0s0 = &generations[0].steps[0]; - allocNurseries(); weak_ptr_list = NULL; @@ -259,7 +241,6 @@ initStorage( void ) revertible_caf_list = NULL; /* initialise the allocate() interface */ - small_alloc_list = NULL; alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; @@ -289,8 +270,8 @@ freeStorage (void) #if defined(THREADED_RTS) closeMutex(&sm_mutex); closeMutex(&atomic_modify_mutvar_mutex); - stgFree(nurseries); #endif + stgFree(nurseries); } /* ----------------------------------------------------------------------------- @@ -559,57 +540,67 @@ resizeNurseries (nat blocks) /* ----------------------------------------------------------------------------- The allocate() interface - allocate(n) always succeeds, and returns a chunk of memory n words - long. n can be larger than the size of a block if necessary, in - which case a contiguous block group will be allocated. + allocateInGen() function allocates memory directly into a specific + generation. It always succeeds, and returns a chunk of memory n + words long. n can be larger than the size of a block if necessary, + in which case a contiguous block group will be allocated. + + allocate(n) is equivalent to allocateInGen(g0). -------------------------------------------------------------------------- */ StgPtr -allocate( nat n ) +allocateInGen (generation *g, nat n) { + step *stp; bdescr *bd; - StgPtr p; + StgPtr ret; ACQUIRE_SM_LOCK; - + TICK_ALLOC_HEAP_NOCTR(n); CCS_ALLOC(CCCS,n); - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + stp = &g->steps[0]; + + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) + { nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += bd->blocks; // might be larger than req_blocks - bd->gen_no = 0; - bd->step = g0s0; + dbl_link_onto(bd, &stp->large_objects); + stp->n_large_blocks += bd->blocks; // might be larger than req_blocks + bd->gen_no = g->no; + bd->step = stp; bd->flags = BF_LARGE; bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; - - /* small allocation ( alloc_HpLim) { - if (small_alloc_list) { - small_alloc_list->free = alloc_Hp; - } - bd = allocBlock(); - bd->link = small_alloc_list; - small_alloc_list = bd; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = 0; - alloc_Hp = bd->start; - alloc_HpLim = bd->start + BLOCK_SIZE_W; - alloc_blocks++; + ret = bd->start; } - - p = alloc_Hp; - alloc_Hp += n; + else + { + // small allocation (blocks; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + bd = allocBlock(); + bd->gen_no = g->no; + bd->step = stp; + bd->flags = 0; + bd->link = stp->blocks; + stp->blocks = bd; + stp->n_blocks++; + alloc_blocks++; + } + ret = bd->free; + bd->free += n; + } + RELEASE_SM_LOCK; - return p; + + return ret; +} + +StgPtr +allocate (nat n) +{ + return allocateInGen(g0,n); } lnat @@ -617,7 +608,7 @@ allocatedBytes( void ) { lnat allocated; - allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp); + allocated = alloc_blocks * BLOCK_SIZE_W; if (pinned_object_block != NULL) { allocated -= (pinned_object_block->start + BLOCK_SIZE_W) - pinned_object_block->free; @@ -626,16 +617,6 @@ allocatedBytes( void ) return allocated; } -void -tidyAllocateLists (void) -{ - if (small_alloc_list != NULL) { - ASSERT(alloc_Hp >= small_alloc_list->start && - alloc_Hp <= small_alloc_list->start + BLOCK_SIZE); - small_alloc_list->free = alloc_Hp; - } -} - /* ----------------------------------------------------------------------------- allocateLocal() @@ -655,60 +636,46 @@ allocateLocal (Capability *cap, nat n) bdescr *bd; StgPtr p; - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - ACQUIRE_SM_LOCK; - bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += bd->blocks; // might be larger than req_blocks - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = BF_LARGE; - bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; - - /* small allocation (r.rCurrentAlloc; - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + /* small allocation (r.rCurrentNursery->link; - - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { - // The nursery is empty, or the next block is already - // full: allocate a fresh block (we can't fail here). - ACQUIRE_SM_LOCK; - bd = allocBlock(); - cap->r.rNursery->n_blocks++; - RELEASE_SM_LOCK; - bd->gen_no = 0; - bd->step = cap->r.rNursery; - bd->flags = 0; - alloc_blocks++; - } else { - // we have a block in the nursery: take it and put - // it at the *front* of the nursery list, and use it - // to allocate() from. - cap->r.rCurrentNursery->link = bd->link; - if (bd->link != NULL) { - bd->link->u.back = cap->r.rCurrentNursery; - } - } - dbl_link_onto(bd, &cap->r.rNursery->blocks); - cap->r.rCurrentAlloc = bd; - IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); - } + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + + bd = cap->r.rCurrentAlloc; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + + // The CurrentAlloc block is full, we need to find another + // one. First, we try taking the next block from the + // nursery: + bd = cap->r.rCurrentNursery->link; + + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + // The nursery is empty, or the next block is already + // full: allocate a fresh block (we can't fail here). + ACQUIRE_SM_LOCK; + bd = allocBlock(); + cap->r.rNursery->n_blocks++; + RELEASE_SM_LOCK; + bd->gen_no = 0; + bd->step = cap->r.rNursery; + bd->flags = 0; + alloc_blocks++; + } else { + // we have a block in the nursery: take it and put + // it at the *front* of the nursery list, and use it + // to allocate() from. + cap->r.rCurrentNursery->link = bd->link; + if (bd->link != NULL) { + bd->link->u.back = cap->r.rCurrentNursery; + } + } + dbl_link_onto(bd, &cap->r.rNursery->blocks); + cap->r.rCurrentAlloc = bd; + IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); } p = bd->free; bd->free += n; @@ -1140,7 +1107,7 @@ memInventory(void) nat g, s, i; step *stp; lnat gen_blocks[RtsFlags.GcFlags.generations]; - lnat nursery_blocks, allocate_blocks, retainer_blocks, + lnat nursery_blocks, retainer_blocks, arena_blocks, exec_blocks; lnat live_blocks = 0, free_blocks = 0; @@ -1153,11 +1120,6 @@ memInventory(void) } gen_blocks[g] += countAllocdBlocks(generations[g].mut_list); for (s = 0; s < generations[g].n_steps; s++) { -#if !defined(THREADED_RTS) - // We put pinned object blocks in g0s0, so better count - // blocks there too. - if (g==0 && s==0) continue; -#endif stp = &generations[g].steps[s]; gen_blocks[g] += stepBlocks(stp); } @@ -1168,9 +1130,6 @@ memInventory(void) nursery_blocks += stepBlocks(&nurseries[i]); } - /* any blocks held by allocate() */ - allocate_blocks = countAllocdBlocks(small_alloc_list); - retainer_blocks = 0; #ifdef PROFILING if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { @@ -1191,7 +1150,7 @@ memInventory(void) for (g = 0; g < RtsFlags.GcFlags.generations; g++) { live_blocks += gen_blocks[g]; } - live_blocks += nursery_blocks + allocate_blocks + live_blocks += nursery_blocks + + retainer_blocks + arena_blocks + exec_blocks; if (live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK) @@ -1201,7 +1160,6 @@ memInventory(void) debugBelch(" gen %d blocks : %4lu\n", g, gen_blocks[g]); } debugBelch(" nursery : %4lu\n", nursery_blocks); - debugBelch(" allocate() : %4lu\n", allocate_blocks); debugBelch(" retainer : %4lu\n", retainer_blocks); debugBelch(" arena blocks : %4lu\n", arena_blocks); debugBelch(" exec : %4lu\n", exec_blocks); -- 1.7.10.4