From fac738e582dcaca1575f5291c83910db01d25284 Mon Sep 17 00:00:00 2001 From: "simonmar@microsoft.com" Date: Thu, 28 Feb 2008 15:31:29 +0000 Subject: [PATCH] Release some of the memory allocated to a stack when it shrinks (#2090) When a stack is occupying less than 1/4 of the memory it owns, and is larger than a megablock, we release half of it. Shrinking is O(1), it doesn't need to copy the stack. --- includes/Block.h | 27 +++++++++++++++++--------- includes/Storage.h | 3 +++ rts/Schedule.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++- rts/sm/BlockAlloc.c | 34 +++++++++++++++++++++++++++++++++ rts/sm/Storage.c | 28 +++++++++++++++++++++++++++ 5 files changed, 135 insertions(+), 10 deletions(-) diff --git a/includes/Block.h b/includes/Block.h index 7721765..112092c 100644 --- a/includes/Block.h +++ b/includes/Block.h @@ -227,21 +227,30 @@ void freeChain(bdescr *p); void freeGroup_lock(bdescr *p); void freeChain_lock(bdescr *p); -/* Round a value to megablocks --------------------------------------------- */ +bdescr * splitBlockGroup (bdescr *bd, nat blocks); -#define WORDS_PER_MBLOCK (BLOCKS_PER_MBLOCK * BLOCK_SIZE_W) +/* Round a value to megablocks --------------------------------------------- */ +// We want to allocate an object around a given size, round it up or +// down to the nearest size that will fit in an mblock group. INLINE_HEADER StgWord round_to_mblocks(StgWord words) { - if (words > WORDS_PER_MBLOCK) { - if ((words % WORDS_PER_MBLOCK) < (WORDS_PER_MBLOCK / 2)) { - words = (words / WORDS_PER_MBLOCK) * WORDS_PER_MBLOCK; - } else { - words = ((words / WORDS_PER_MBLOCK) + 1) * WORDS_PER_MBLOCK; + if (words > BLOCKS_PER_MBLOCK * BLOCK_SIZE_W) { + // first, ignore the gap at the beginning of the first mblock by + // adding it to the total words. Then we can pretend we're + // dealing in a uniform unit of megablocks. + words += FIRST_BLOCK_OFF/sizeof(W_); + + if ((words % MBLOCK_SIZE_W) < (MBLOCK_SIZE_W / 2)) { + words = (words / MBLOCK_SIZE_W) * MBLOCK_SIZE_W; + } else { + words = ((words / MBLOCK_SIZE_W) + 1) * MBLOCK_SIZE_W; + } + + words -= FIRST_BLOCK_OFF/sizeof(W_); } - } - return words; + return words; } #endif /* !CMINUSMINUS */ diff --git a/includes/Storage.h b/includes/Storage.h index d545054..a830b44 100644 --- a/includes/Storage.h +++ b/includes/Storage.h @@ -186,6 +186,9 @@ doYouWantToGC( void ) extern void *allocateExec (nat bytes); extern void freeExec (void *p); +/* for splitting blocks groups in two */ +extern bdescr * splitLargeBlock (bdescr *bd, nat blocks); + /* ----------------------------------------------------------------------------- Performing Garbage Collection diff --git a/rts/Schedule.c b/rts/Schedule.c index caa19b2..5fa949c 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -222,6 +222,7 @@ static Capability *scheduleDoGC(Capability *cap, Task *task, static rtsBool checkBlackHoles(Capability *cap); static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso); +static StgTSO *threadStackUnderflow(Task *task, StgTSO *tso); static void deleteThread (Capability *cap, StgTSO *tso); static void deleteAllThreads (Capability *cap); @@ -598,7 +599,7 @@ run_thread: // ACTIVITY_DONE_GC means we turned off the timer signal to // conserve power (see #1623). Re-enable it here. nat prev; - prev = xchg(&recent_activity, ACTIVITY_YES); + prev = xchg((P_)&recent_activity, ACTIVITY_YES); if (prev == ACTIVITY_DONE_GC) { startTimer(); } @@ -683,6 +684,8 @@ run_thread: schedulePostRunThread(); + t = threadStackUnderflow(task,t); + ready_to_gc = rtsFalse; switch (ret) { @@ -2805,6 +2808,54 @@ threadStackOverflow(Capability *cap, StgTSO *tso) return dest; } +static StgTSO * +threadStackUnderflow (Task *task STG_UNUSED, StgTSO *tso) +{ + bdescr *bd, *new_bd; + lnat new_tso_size_w, tso_size_w; + StgTSO *new_tso; + + tso_size_w = tso_sizeW(tso); + + if (tso_size_w < MBLOCK_SIZE_W || + (nat)(tso->stack + tso->stack_size - tso->sp) > tso->stack_size / 4) + { + return tso; + } + + // don't allow throwTo() to modify the blocked_exceptions queue + // while we are moving the TSO: + lockClosure((StgClosure *)tso); + + new_tso_size_w = round_to_mblocks(tso_size_w/2); + + debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu", + tso->id, tso_size_w, new_tso_size_w); + + bd = Bdescr((StgPtr)tso); + new_bd = splitLargeBlock(bd, new_tso_size_w / BLOCK_SIZE_W); + + new_tso = (StgTSO *)new_bd->start; + memcpy(new_tso,tso,TSO_STRUCT_SIZE); + new_tso->stack_size = new_tso_size_w - TSO_STRUCT_SIZEW; + + tso->what_next = ThreadRelocated; + tso->_link = new_tso; // no write barrier reqd: same generation + + // The TSO attached to this Task may have moved, so update the + // pointer to it. + if (task->tso == tso) { + task->tso = new_tso; + } + + unlockTSO(new_tso); + unlockTSO(tso); + + IF_DEBUG(sanity,checkTSO(new_tso)); + + return new_tso; +} + /* --------------------------------------------------------------------------- Interrupt execution - usually called inside a signal handler so it mustn't do anything fancy. diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index 8b85956..2e8ad73 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -549,6 +549,40 @@ freeChain_lock(bdescr *bd) RELEASE_SM_LOCK; } +bdescr * +splitBlockGroup (bdescr *bd, nat blocks) +{ + bdescr *new_bd; + + if (bd->blocks <= blocks) { + barf("splitLargeBlock: too small"); + } + + if (bd->blocks > BLOCKS_PER_MBLOCK) { + nat mblocks; + void *new_mblock; + if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) { + barf("splitLargeBlock: not a multiple of a megablock"); + } + mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE); + new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + mblocks * MBLOCK_SIZE_W); + initMBlock(new_mblock); + new_bd = FIRST_BDESCR(new_mblock); + new_bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks); + } + else + { + // NB. we're not updating all the bdescrs in the split groups to + // point to the new heads, so this can only be used for large + // objects which do not start in the non-head block. + new_bd = bd + blocks; + new_bd->blocks = bd->blocks - blocks; + } + bd->blocks = blocks; + + return new_bd; +} + static void initMBlock(void *mblock) { diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index a07685b..58cd766 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -633,6 +633,34 @@ allocatedBytes( void ) return allocated; } +// split N blocks off the start of the given bdescr, returning the +// remainder as a new block group. We treat the remainder as if it +// had been freshly allocated in generation 0. +bdescr * +splitLargeBlock (bdescr *bd, nat blocks) +{ + bdescr *new_bd; + + // subtract the original number of blocks from the counter first + bd->step->n_large_blocks -= bd->blocks; + + new_bd = splitBlockGroup (bd, blocks); + + dbl_link_onto(new_bd, &g0s0->large_objects); + g0s0->n_large_blocks += new_bd->blocks; + new_bd->gen_no = g0s0->no; + new_bd->step = g0s0; + new_bd->flags = BF_LARGE; + new_bd->free = bd->free; + + // add the new number of blocks to the counter. Due to the gaps + // for block descriptor, new_bd->blocks + bd->blocks might not be + // equal to the original bd->blocks, which is why we do it this way. + bd->step->n_large_blocks += bd->blocks; + + return new_bd; +} + /* ----------------------------------------------------------------------------- allocateLocal() -- 1.7.10.4