From: Simon Marlow Date: Sun, 9 May 2010 19:49:28 +0000 (+0000) Subject: Small optimisation: allocate nursery blocks contiguously X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=cebecc110be6e53c310326940a311cab7fad84ca Small optimisation: allocate nursery blocks contiguously This lets automatic prefetching work better, for a tiny performance boost --- diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 34cdab1..2e43bff 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -332,31 +332,45 @@ static bdescr * allocNursery (bdescr *tail, nat blocks) { bdescr *bd; - nat i; + nat i, n; - // Allocate a nursery: we allocate fresh blocks one at a time and - // cons them on to the front of the list, not forgetting to update - // the back pointer on the tail of the list to point to the new block. - for (i=0; i < blocks; i++) { - // @LDV profiling - /* - processNursery() in LdvProfile.c assumes that every block group in - the nursery contains only a single block. So, if a block group is - given multiple blocks, change processNursery() accordingly. - */ - bd = allocBlock(); - bd->link = tail; - // double-link the nursery: we might need to insert blocks - if (tail != NULL) { - tail->u.back = bd; - } - initBdescr(bd, g0, g0); - bd->flags = 0; - bd->free = bd->start; - tail = bd; + // We allocate the nursery as a single contiguous block and then + // divide it into single blocks manually. This way we guarantee + // that the nursery blocks are adjacent, so that the processor's + // automatic prefetching works across nursery blocks. This is a + // tiny optimisation (~0.5%), but it's free. + + while (blocks > 0) { + n = stg_min(blocks, BLOCKS_PER_MBLOCK); + blocks -= n; + + bd = allocGroup(n); + for (i = 0; i < n; i++) { + initBdescr(&bd[i], g0, g0); + + bd[i].blocks = 1; + bd[i].flags = 0; + + if (i > 0) { + bd[i].u.back = &bd[i-1]; + } + + if (i+1 < n) { + bd[i].link = &bd[i+1]; + } else { + bd[i].link = tail; + if (tail != NULL) { + tail->u.back = &bd[i]; + } + } + + bd[i].free = bd[i].start; + } + + tail = &bd[0]; } - tail->u.back = NULL; - return tail; + + return &bd[0]; } static void