X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FStorage.c;h=3ede82d14157bc4d6e2e223f61c0f54a095e7e85;hb=8604da0136707cc14845d14a88c2272fe576b6d0;hp=fba30bbc16f4d8ef816ce37508d70ec4ba4599ad;hpb=d12cd2f2c930773a34781b942b8248a3818eeae8;p=ghc-hetmet.git diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index fba30bb..3ede82d 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -40,14 +40,10 @@ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; -bdescr *small_alloc_list; /* allocate()d small objects */ bdescr *pinned_object_block; /* allocate pinned objects into this block */ nat alloc_blocks; /* number of allocate()d blocks since GC */ nat alloc_blocks_lim; /* approximate limit on alloc_blocks */ -StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */ -StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */ - generation *generations = NULL; /* all the generations */ generation *g0 = NULL; /* generation 0, for convenience */ generation *oldest_gen = NULL; /* oldest generation, for convenience */ @@ -115,10 +111,13 @@ initStorage( void ) return; } + initMBlocks(); + /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be * doing something reasonable. */ - ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info)); + /* We use the NOT_NULL variant or gcc warns that the test is always true */ + ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL(&stg_BLACKHOLE_info)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure)); ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure)); @@ -188,12 +187,11 @@ initStorage( void ) #ifdef THREADED_RTS n_nurseries = n_capabilities; - nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), - "initStorage: nurseries"); #else n_nurseries = 1; - nurseries = g0->steps; // just share nurseries[0] with g0s0 -#endif +#endif + nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), + "initStorage: nurseries"); /* Initialise all steps */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) { @@ -202,11 +200,9 @@ initStorage( void ) } } -#ifdef THREADED_RTS for (s = 0; s < n_nurseries; s++) { initStep(&nurseries[s], 0, s); } -#endif /* Set up the destination pointers in each younger gen. step */ for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) { @@ -217,11 +213,9 @@ initStorage( void ) } oldest_gen->steps[0].to = &oldest_gen->steps[0]; -#ifdef THREADED_RTS for (s = 0; s < n_nurseries; s++) { nurseries[s].to = generations[0].steps[0].to; } -#endif /* The oldest generation has one step. */ if (RtsFlags.GcFlags.compact) { @@ -232,24 +226,15 @@ initStorage( void ) } } -#ifdef THREADED_RTS - if (RtsFlags.GcFlags.generations == 1) { - errorBelch("-G1 is incompatible with -threaded"); - stg_exit(EXIT_FAILURE); - } -#endif - - /* generation 0 is special: that's the nursery */ generations[0].max_blocks = 0; + g0s0 = &generations[0].steps[0]; - /* G0S0: the allocation area. Policy: keep the allocation area + /* The allocation area. Policy: keep the allocation area * small to begin with, even if we have a large suggested heap * size. Reason: we're going to do a major collection first, and we * don't want it to be a big one. This vague idea is borne out by * rigorous experimental evidence. */ - g0s0 = &generations[0].steps[0]; - allocNurseries(); weak_ptr_list = NULL; @@ -257,7 +242,6 @@ initStorage( void ) revertible_caf_list = NULL; /* initialise the allocate() interface */ - small_alloc_list = NULL; alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; @@ -288,6 +272,7 @@ freeStorage (void) closeMutex(&sm_mutex); closeMutex(&atomic_modify_mutvar_mutex); #endif + stgFree(nurseries); } /* ----------------------------------------------------------------------------- @@ -556,57 +541,75 @@ resizeNurseries (nat blocks) /* ----------------------------------------------------------------------------- The allocate() interface - allocate(n) always succeeds, and returns a chunk of memory n words - long. n can be larger than the size of a block if necessary, in - which case a contiguous block group will be allocated. + allocateInGen() function allocates memory directly into a specific + generation. It always succeeds, and returns a chunk of memory n + words long. n can be larger than the size of a block if necessary, + in which case a contiguous block group will be allocated. + + allocate(n) is equivalent to allocateInGen(g0). -------------------------------------------------------------------------- */ StgPtr -allocate( nat n ) +allocateInGen (generation *g, nat n) { + step *stp; bdescr *bd; - StgPtr p; + StgPtr ret; ACQUIRE_SM_LOCK; - + TICK_ALLOC_HEAP_NOCTR(n); CCS_ALLOC(CCCS,n); - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + stp = &g->steps[0]; + + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) + { nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + + // Attempting to allocate an object larger than maxHeapSize + // should definitely be disallowed. (bug #1791) + if (RtsFlags.GcFlags.maxHeapSize > 0 && + req_blocks >= RtsFlags.GcFlags.maxHeapSize) { + heapOverflow(); + } + bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += bd->blocks; // might be larger than req_blocks - bd->gen_no = 0; - bd->step = g0s0; + dbl_link_onto(bd, &stp->large_objects); + stp->n_large_blocks += bd->blocks; // might be larger than req_blocks + bd->gen_no = g->no; + bd->step = stp; bd->flags = BF_LARGE; bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; - - /* small allocation ( alloc_HpLim) { - if (small_alloc_list) { - small_alloc_list->free = alloc_Hp; - } - bd = allocBlock(); - bd->link = small_alloc_list; - small_alloc_list = bd; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = 0; - alloc_Hp = bd->start; - alloc_HpLim = bd->start + BLOCK_SIZE_W; - alloc_blocks++; + ret = bd->start; } - - p = alloc_Hp; - alloc_Hp += n; + else + { + // small allocation (blocks; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + bd = allocBlock(); + bd->gen_no = g->no; + bd->step = stp; + bd->flags = 0; + bd->link = stp->blocks; + stp->blocks = bd; + stp->n_blocks++; + alloc_blocks++; + } + ret = bd->free; + bd->free += n; + } + RELEASE_SM_LOCK; - return p; + + return ret; +} + +StgPtr +allocate (nat n) +{ + return allocateInGen(g0,n); } lnat @@ -614,7 +617,7 @@ allocatedBytes( void ) { lnat allocated; - allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp); + allocated = alloc_blocks * BLOCK_SIZE_W; if (pinned_object_block != NULL) { allocated -= (pinned_object_block->start + BLOCK_SIZE_W) - pinned_object_block->free; @@ -623,16 +626,6 @@ allocatedBytes( void ) return allocated; } -void -tidyAllocateLists (void) -{ - if (small_alloc_list != NULL) { - ASSERT(alloc_Hp >= small_alloc_list->start && - alloc_Hp <= small_alloc_list->start + BLOCK_SIZE); - small_alloc_list->free = alloc_Hp; - } -} - /* ----------------------------------------------------------------------------- allocateLocal() @@ -652,59 +645,48 @@ allocateLocal (Capability *cap, nat n) bdescr *bd; StgPtr p; - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - ACQUIRE_SM_LOCK; - bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += bd->blocks; // might be larger than req_blocks - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = BF_LARGE; - bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; - - /* small allocation (r.rCurrentAlloc; - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + /* small allocation (r.rCurrentNursery->link; - - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { - // The nursery is empty, or the next block is already - // full: allocate a fresh block (we can't fail here). - ACQUIRE_SM_LOCK; - bd = allocBlock(); - cap->r.rNursery->n_blocks++; - RELEASE_SM_LOCK; - bd->gen_no = 0; - bd->step = cap->r.rNursery; - bd->flags = 0; - } else { - // we have a block in the nursery: take it and put - // it at the *front* of the nursery list, and use it - // to allocate() from. - cap->r.rCurrentNursery->link = bd->link; - if (bd->link != NULL) { - bd->link->u.back = cap->r.rCurrentNursery; - } - } - dbl_link_onto(bd, &cap->r.rNursery->blocks); - cap->r.rCurrentAlloc = bd; - IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); - } + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + + bd = cap->r.rCurrentAlloc; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + + // The CurrentAlloc block is full, we need to find another + // one. First, we try taking the next block from the + // nursery: + bd = cap->r.rCurrentNursery->link; + + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + // The nursery is empty, or the next block is already + // full: allocate a fresh block (we can't fail here). + ACQUIRE_SM_LOCK; + bd = allocBlock(); + cap->r.rNursery->n_blocks++; + RELEASE_SM_LOCK; + bd->gen_no = 0; + bd->step = cap->r.rNursery; + bd->flags = 0; + // NO: alloc_blocks++; + // calcAllocated() uses the size of the nursery, and we've + // already bumpted nursery->n_blocks above. + } else { + // we have a block in the nursery: take it and put + // it at the *front* of the nursery list, and use it + // to allocate() from. + cap->r.rCurrentNursery->link = bd->link; + if (bd->link != NULL) { + bd->link->u.back = cap->r.rCurrentNursery; + } + } + dbl_link_onto(bd, &cap->r.rNursery->blocks); + cap->r.rCurrentAlloc = bd; + IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); } p = bd->free; bd->free += n; @@ -778,12 +760,15 @@ allocatePinned( nat n ) } /* ----------------------------------------------------------------------------- + Write Barriers + -------------------------------------------------------------------------- */ + +/* This is the write barrier for MUT_VARs, a.k.a. IORefs. A MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY and is put on the mutable list. - -------------------------------------------------------------------------- */ - +*/ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) { @@ -796,6 +781,23 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) } } +/* + This is the write barrier for MVARs. An MVAR_CLEAN objects is not + on the mutable list; a MVAR_DIRTY is. When written to, a + MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list. + The check for MVAR_CLEAN is inlined at the call site for speed, + this really does make a difference on concurrency-heavy benchmarks + such as Chaneneos and cheap-concurrency. +*/ +void +dirty_MVAR(StgRegTable *reg, StgClosure *p) +{ + Capability *cap = regTableToCapability(reg); + bdescr *bd; + bd = Bdescr((StgPtr)p); + if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no); +} + /* ----------------------------------------------------------------------------- Allocation functions for GMP. @@ -1045,20 +1047,17 @@ void freeExec (void *addr) bd->gen_no -= *(StgPtr)p; *(StgPtr)p = 0; - // Free the block if it is empty, but not if it is the block at - // the head of the queue. - if (bd->gen_no == 0 && bd != exec_block) { - debugTrace(DEBUG_gc, "free exec block %p", bd->start); - if (bd->u.back) { - bd->u.back->link = bd->link; - } else { - exec_block = bd->link; - } - if (bd->link) { - bd->link->u.back = bd->u.back; - } - setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse); - freeGroup(bd); + if (bd->gen_no == 0) { + // Free the block if it is empty, but not if it is the block at + // the head of the queue. + if (bd != exec_block) { + debugTrace(DEBUG_gc, "free exec block %p", bd->start); + dbl_link_remove(bd, &exec_block); + setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse); + freeGroup(bd); + } else { + bd->free = bd->start; + } } RELEASE_SM_LOCK @@ -1119,7 +1118,7 @@ memInventory(void) nat g, s, i; step *stp; lnat gen_blocks[RtsFlags.GcFlags.generations]; - lnat nursery_blocks, allocate_blocks, retainer_blocks, + lnat nursery_blocks, retainer_blocks, arena_blocks, exec_blocks; lnat live_blocks = 0, free_blocks = 0; @@ -1132,11 +1131,6 @@ memInventory(void) } gen_blocks[g] += countAllocdBlocks(generations[g].mut_list); for (s = 0; s < generations[g].n_steps; s++) { -#if !defined(THREADED_RTS) - // We put pinned object blocks in g0s0, so better count - // blocks there too. - if (g==0 && s==0) continue; -#endif stp = &generations[g].steps[s]; gen_blocks[g] += stepBlocks(stp); } @@ -1147,9 +1141,6 @@ memInventory(void) nursery_blocks += stepBlocks(&nurseries[i]); } - /* any blocks held by allocate() */ - allocate_blocks = countAllocdBlocks(small_alloc_list); - retainer_blocks = 0; #ifdef PROFILING if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { @@ -1170,7 +1161,7 @@ memInventory(void) for (g = 0; g < RtsFlags.GcFlags.generations; g++) { live_blocks += gen_blocks[g]; } - live_blocks += nursery_blocks + allocate_blocks + live_blocks += nursery_blocks + + retainer_blocks + arena_blocks + exec_blocks; if (live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK) @@ -1180,7 +1171,6 @@ memInventory(void) debugBelch(" gen %d blocks : %4lu\n", g, gen_blocks[g]); } debugBelch(" nursery : %4lu\n", nursery_blocks); - debugBelch(" allocate() : %4lu\n", allocate_blocks); debugBelch(" retainer : %4lu\n", retainer_blocks); debugBelch(" arena blocks : %4lu\n", arena_blocks); debugBelch(" exec : %4lu\n", exec_blocks);