From: simonmar Date: Tue, 10 May 2005 13:25:43 +0000 (+0000) Subject: [project @ 2005-05-10 13:25:41 by simonmar] X-Git-Tag: Initial_conversion_from_CVS_complete~579 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=bf8219815876579f80b4e30188b982bf3e673199;p=ghc-hetmet.git [project @ 2005-05-10 13:25:41 by simonmar] Two SMP-related changes: - New storage manager interface: bdescr *allocateLocal(StgRegTable *reg, nat words) which allocates from the current thread's nursery (being careful not to clash with the heap pointer). It can do this without taking any locks; the lock only has to be taken if a block needs to be allocated. allocateLocal() is now used instead of allocate() in a few PrimOps. This removes locks from most Integer operations, cutting down the overhead for SMP a bit more. To make this work, we have to be able to grab the current thread's Capability out of thin air (i.e. when called from GMP), so the Capability subsystem needs to keep a hash from thread IDs to Capabilities. - Small MVar optimisation: instead of taking the global storage-manager lock, do our own locking of MVars with a bit of inline assembly (x86 only for now). --- diff --git a/ghc/includes/Cmm.h b/ghc/includes/Cmm.h index e989a00..415dc4c 100644 --- a/ghc/includes/Cmm.h +++ b/ghc/includes/Cmm.h @@ -317,8 +317,11 @@ HP_CHK_GEN(alloc,liveness,reentry); \ TICK_ALLOC_HEAP_NOCTR(alloc); +// allocateLocal() allocates from the nursery, so we check to see +// whether the nursery is nearly empty in any function that uses +// allocateLocal() - this includes many of the primops. #define MAYBE_GC(liveness,reentry) \ - if (CInt[alloc_blocks] >= CInt[alloc_blocks_lim]) { \ + if (bdescr_link(CurrentNursery) == NULL) { \ R9 = liveness; \ R10 = reentry; \ jump stg_gc_gen_hp; \ diff --git a/ghc/includes/Regs.h b/ghc/includes/Regs.h index 0203238..5374972 100644 --- a/ghc/includes/Regs.h +++ b/ghc/includes/Regs.h @@ -87,7 +87,8 @@ typedef struct StgRegTable_ { StgPtr rHpLim; struct StgTSO_ *rCurrentTSO; struct step_ *rNursery; - struct bdescr_ *rCurrentNursery; + struct bdescr_ *rCurrentNursery; /* Hp/HpLim point into this block */ + struct bdescr_ *rCurrentAlloc; /* for allocation using allocate() */ StgWord rHpAlloc; /* number of *bytes* being allocated in heap */ #if defined(SMP) || defined(PAR) StgSparkPool rSparks; /* per-task spark pool */ diff --git a/ghc/includes/Rts.h b/ghc/includes/Rts.h index fb2a70b..27331be 100644 --- a/ghc/includes/Rts.h +++ b/ghc/includes/Rts.h @@ -110,13 +110,8 @@ extern void _assertFail (char *, unsigned int); #include "Parallel.h" /* STG/Optimised-C related stuff */ -#include "SMP.h" #include "Block.h" -#ifdef SMP -#include -#endif - /* GNU mp library */ #include "gmp.h" diff --git a/ghc/includes/SMP.h b/ghc/includes/SMP.h index e35b95b..86930f9 100644 --- a/ghc/includes/SMP.h +++ b/ghc/includes/SMP.h @@ -1,6 +1,6 @@ /* ---------------------------------------------------------------------------- * - * (c) The GHC Team, 1999 + * (c) The GHC Team, 2005 * * Macros for SMP support * @@ -23,60 +23,37 @@ #error Build options incompatible with SMP. #endif -/* - * CMPXCHG - this instruction is the standard "test & set". We use it - * for locking closures in the thunk and blackhole entry code. If the - * closure is already locked, or has an unexpected info pointer - * (because another thread is altering it in parallel), we just jump - * to the new entry point. - */ -#if defined(i386_HOST_ARCH) && defined(TABLES_NEXT_TO_CODE) -#define CMPXCHG(p, cmp, new) \ - __asm__ __volatile__ ( \ - "lock ; cmpxchg %1, %0\n" \ - "\tje 1f\n" \ - "\tjmp *%%eax\n" \ - "\t1:\n" \ - : /* no outputs */ \ - : "m" (p), "r" (new), "r" (cmp) \ - ) - /* * XCHG - the atomic exchange instruction. Used for locking closures * during updates (see LOCK_CLOSURE below) and the MVar primops. */ -#define XCHG(reg, obj) \ - __asm__ __volatile__ ( \ - "xchgl %1,%0" \ - :"+r" (reg), "+m" (obj) \ - : /* no input-only operands */ \ - ) - +INLINE_HEADER StgWord +xchg(StgPtr p, StgWord w) +{ + StgWord result; + result = w; + __asm__ __volatile__ ( + "xchgl %1,%0" + :"+r" (result), "+m" (*p) + : /* no input-only operands */ + ); + return result; +} + +INLINE_HEADER StgInfoTable * +lockClosure(StgClosure *p) +{ + StgWord info; +#if 0 + do { + info = xchg((P_)&p->header.info, (W_)&stg_WHITEHOLE_info); + if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info; + yieldThread(); + } while (1); #else -#error SMP macros not defined for this architecture + info = p->header.info; #endif - -/* - * LOCK_CLOSURE locks the specified closure, busy waiting for any - * existing locks to be cleared. - */ -#define LOCK_CLOSURE(c) \ - ({ \ - const StgInfoTable *__info; \ - __info = &stg_WHITEHOLE_info; \ - do { \ - XCHG(__info,((StgClosure *)(c))->header.info); \ - } while (__info == &stg_WHITEHOLE_info); \ - __info; \ - }) - -#define LOCK_THUNK(__info) \ - CMPXCHG(R1.cl->header.info, __info, &stg_WHITEHOLE_info); - -#else /* !SMP */ - -#define LOCK_CLOSURE(c) /* nothing */ -#define LOCK_THUNK(__info) /* nothing */ +} #endif /* SMP */ diff --git a/ghc/includes/StgMiscClosures.h b/ghc/includes/StgMiscClosures.h index f8332aa..026c2cf 100644 --- a/ghc/includes/StgMiscClosures.h +++ b/ghc/includes/StgMiscClosures.h @@ -95,6 +95,7 @@ RTS_INFO(stg_IND_OLDGEN_info); RTS_INFO(stg_IND_OLDGEN_PERM_info); RTS_INFO(stg_CAF_UNENTERED_info); RTS_INFO(stg_CAF_ENTERED_info); +RTS_INFO(stg_WHITEHOLE_info); RTS_INFO(stg_BLACKHOLE_info); RTS_INFO(stg_CAF_BLACKHOLE_info); #ifdef TICKY_TICKY @@ -155,6 +156,7 @@ RTS_ENTRY(stg_IND_OLDGEN_entry); RTS_ENTRY(stg_IND_OLDGEN_PERM_entry); RTS_ENTRY(stg_CAF_UNENTERED_entry); RTS_ENTRY(stg_CAF_ENTERED_entry); +RTS_ENTRY(stg_WHITEHOLE_entry); RTS_ENTRY(stg_BLACKHOLE_entry); RTS_ENTRY(stg_CAF_BLACKHOLE_entry); #ifdef TICKY_TICKY diff --git a/ghc/includes/Storage.h b/ghc/includes/Storage.h index a8a6d24..0ef5785 100644 --- a/ghc/includes/Storage.h +++ b/ghc/includes/Storage.h @@ -139,6 +139,7 @@ extern void exitStorage(void); -------------------------------------------------------------------------- */ extern StgPtr allocate ( nat n ); +extern StgPtr allocateLocal ( StgRegTable *reg, nat n ); extern StgPtr allocatePinned ( nat n ); extern lnat allocated_bytes ( void ); @@ -193,6 +194,9 @@ extern void GarbageCollect(void (*get_roots)(evac_fn),rtsBool force_major_gc); */ #if defined(SMP) extern Mutex sm_mutex; +#endif + +#if defined(SMP) #define ACQUIRE_SM_LOCK ACQUIRE_LOCK(&sm_mutex); #define RELEASE_SM_LOCK RELEASE_LOCK(&sm_mutex); #else diff --git a/ghc/rts/Capability.c b/ghc/rts/Capability.c index 1e2d3d6..0ae4688 100644 --- a/ghc/rts/Capability.c +++ b/ghc/rts/Capability.c @@ -22,6 +22,9 @@ #include "OSThreads.h" #include "Capability.h" #include "Schedule.h" /* to get at EMPTY_RUN_QUEUE() */ +#if defined(SMP) +#include "Hash.h" +#endif #if !defined(SMP) Capability MainCapability; /* for non-SMP, we have one global capability */ @@ -81,6 +84,11 @@ static rtsBool passingCapability = rtsFalse; * Free capability list. */ Capability *free_capabilities; + +/* + * Maps OSThreadId to Capability * + */ +HashTable *capability_hash; #endif #ifdef SMP @@ -133,6 +141,8 @@ initCapabilities( void ) free_capabilities = &capabilities[0]; rts_n_free_capabilities = n; + capability_hash = allocHashTable(); + IF_DEBUG(scheduler, sched_belch("allocated %d capabilities", n)); #else capabilities = &MainCapability; @@ -164,6 +174,7 @@ grabCapability( Capability** cap ) *cap = free_capabilities; free_capabilities = (*cap)->link; rts_n_free_capabilities--; + insertHashTable(capability_hash, osThreadId(), *cap); #else # if defined(RTS_SUPPORTS_THREADS) ASSERT(rts_n_free_capabilities == 1); @@ -177,6 +188,23 @@ grabCapability( Capability** cap ) } /* ---------------------------------------------------------------------------- + * Function: myCapability(void) + * + * Purpose: Return the capability owned by the current thread. + * Should not be used if the current thread does not + * hold a Capability. + * ------------------------------------------------------------------------- */ +Capability * +myCapability (void) +{ +#if defined(SMP) + return lookupHashTable(capability_hash, osThreadId()); +#else + return &MainCapability; +#endif +} + +/* ---------------------------------------------------------------------------- * Function: releaseCapability(Capability*) * * Purpose: Letting go of a capability. Causes a @@ -195,6 +223,8 @@ releaseCapability( Capability* cap UNUSED_IF_NOT_SMP ) #if defined(SMP) cap->link = free_capabilities; free_capabilities = cap; + ASSERT(myCapability() == cap); + removeHashTable(capability_hash, osThreadId(), NULL); #endif // Check to see whether a worker thread can be given // the go-ahead to return the result of an external call.. diff --git a/ghc/rts/Capability.h b/ghc/rts/Capability.h index 21d4ce4..f1615dc 100644 --- a/ghc/rts/Capability.h +++ b/ghc/rts/Capability.h @@ -38,6 +38,10 @@ extern void releaseCapability( Capability* cap ); // extern void threadRunnable ( void ); +// Return the capability that I own. +// +extern Capability *myCapability (void); + extern void prodWorker ( void ); #ifdef RTS_SUPPORTS_THREADS diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c index db05ef5..fce011a 100644 --- a/ghc/rts/GC.c +++ b/ghc/rts/GC.c @@ -739,7 +739,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) if (stp->is_compacted) { collected += (oldgen_saved_blocks - stp->n_blocks) * BLOCK_SIZE_W; } else { - collected += stp->n_blocks * BLOCK_SIZE_W; + if (g == 0 && s == 0) { + collected += countNurseryBlocks() * BLOCK_SIZE_W; + collected += alloc_blocks; + } else { + collected += stp->n_blocks * BLOCK_SIZE_W; + } } /* free old memory and shift to-space into from-space for all diff --git a/ghc/rts/Makefile b/ghc/rts/Makefile index b564849..802ce61 100644 --- a/ghc/rts/Makefile +++ b/ghc/rts/Makefile @@ -319,7 +319,8 @@ SRC_HC_OPTS += \ -\#include LdvProfile.h \ -\#include Profiling.h \ -\#include OSThreads.h \ - -\#include Apply.h + -\#include Apply.h \ + -\#include SMP.h ifeq "$(Windows)" "YES" PrimOps_HC_OPTS += -\#include '' -\#include win32/AsyncIO.h diff --git a/ghc/rts/PrimOps.cmm b/ghc/rts/PrimOps.cmm index ff1b442..cdca634 100644 --- a/ghc/rts/PrimOps.cmm +++ b/ghc/rts/PrimOps.cmm @@ -49,7 +49,7 @@ newByteArrayzh_fast n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - "ptr" p = foreign "C" allocate(words); + "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words); TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; @@ -97,7 +97,7 @@ newArrayzh_fast MAYBE_GC(R2_PTR,newArrayzh_fast); words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n; - "ptr" arr = foreign "C" allocate(words); + "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words); TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]); @@ -1429,14 +1429,14 @@ takeMVarzh_fast { W_ mvar, val, info, tso; -#if defined(SMP) - foreign "C" ACQUIRE_LOCK(sm_mutex "ptr"); -#endif - /* args: R1 = MVar closure */ mvar = R1; +#if defined(SMP) + "ptr" info = foreign "C" lockClosure(mvar "ptr"); +#else info = GET_INFO(mvar); +#endif /* If the MVar is empty, put ourselves on its blocking queue, * and wait until we're woken up. @@ -1453,7 +1453,7 @@ takeMVarzh_fast StgMVar_tail(mvar) = CurrentTSO; #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_EMPTY_MVAR_info); #endif jump stg_block_takemvar; @@ -1486,7 +1486,7 @@ takeMVarzh_fast } #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_FULL_MVAR_info); #endif RET_P(val); @@ -1494,16 +1494,10 @@ takeMVarzh_fast else { /* No further putMVars, MVar is now empty */ - - /* do this last... we might have locked the MVar in the SMP case, - * and writing the info pointer will unlock it. - */ - SET_INFO(mvar,stg_EMPTY_MVAR_info); StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; - -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif + + /* unlocks the closure in the SMP case */ + SET_INFO(mvar,stg_EMPTY_MVAR_info); RET_P(val); } @@ -1514,23 +1508,23 @@ tryTakeMVarzh_fast { W_ mvar, val, info, tso; -#if defined(SMP) - foreign "C" ACQUIRE_LOCK(sm_mutex "ptr"); -#endif - /* args: R1 = MVar closure */ mvar = R1; +#if defined(SMP) + "ptr" info = foreign "C" lockClosure(mvar "ptr"); +#else info = GET_INFO(mvar); +#endif if (info == stg_EMPTY_MVAR_info) { +#if defined(SMP) + SET_INFO(mvar,stg_EMPTY_MVAR_info); +#endif /* HACK: we need a pointer to pass back, * so we abuse NO_FINALIZER_closure */ -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif RET_NP(0, stg_NO_FINALIZER_closure); } @@ -1559,6 +1553,9 @@ tryTakeMVarzh_fast if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } +#if defined(SMP) + SET_INFO(mvar,stg_FULL_MVAR_info); +#endif } else { @@ -1567,10 +1564,6 @@ tryTakeMVarzh_fast SET_INFO(mvar,stg_EMPTY_MVAR_info); } -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif - RET_NP(1, val); } @@ -1579,14 +1572,14 @@ putMVarzh_fast { W_ mvar, info, tso; -#if defined(SMP) - foreign "C" ACQUIRE_LOCK(sm_mutex "ptr"); -#endif - /* args: R1 = MVar, R2 = value */ mvar = R1; +#if defined(SMP) + "ptr" info = foreign "C" lockClosure(mvar "ptr"); +#else info = GET_INFO(mvar); +#endif if (info == stg_FULL_MVAR_info) { if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) { @@ -1600,7 +1593,7 @@ putMVarzh_fast StgMVar_tail(mvar) = CurrentTSO; #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_FULL_MVAR_info); #endif jump stg_block_putmvar; } @@ -1628,7 +1621,7 @@ putMVarzh_fast } #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_EMPTY_MVAR_info); #endif jump %ENTRY_CODE(Sp(0)); } @@ -1639,9 +1632,6 @@ putMVarzh_fast /* unlocks the MVar in the SMP case */ SET_INFO(mvar,stg_FULL_MVAR_info); -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif jump %ENTRY_CODE(Sp(0)); } @@ -1653,18 +1643,18 @@ tryPutMVarzh_fast { W_ mvar, info, tso; -#if defined(SMP) - foreign "C" ACQUIRE_LOCK(sm_mutex "ptr"); -#endif - /* args: R1 = MVar, R2 = value */ mvar = R1; +#if defined(SMP) + "ptr" info = foreign "C" lockClosure(mvar "ptr"); +#else info = GET_INFO(mvar); +#endif if (info == stg_FULL_MVAR_info) { #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_FULL_MVAR_info); #endif RET_N(0); } @@ -1692,7 +1682,7 @@ tryPutMVarzh_fast } #if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); + SET_INFO(mvar,stg_EMPTY_MVAR_info); #endif jump %ENTRY_CODE(Sp(0)); } @@ -1702,9 +1692,7 @@ tryPutMVarzh_fast StgMVar_value(mvar) = R2; /* unlocks the MVar in the SMP case */ SET_INFO(mvar,stg_FULL_MVAR_info); -#if defined(SMP) - foreign "C" RELEASE_LOCK(sm_mutex "ptr"); -#endif + jump %ENTRY_CODE(Sp(0)); } diff --git a/ghc/rts/StgMiscClosures.cmm b/ghc/rts/StgMiscClosures.cmm index 4e2c0fb..15f27d6 100644 --- a/ghc/rts/StgMiscClosures.cmm +++ b/ghc/rts/StgMiscClosures.cmm @@ -419,6 +419,12 @@ INFO_TABLE(stg_SE_CAF_BLACKHOLE,0,1,SE_CAF_BLACKHOLE,"SE_CAF_BLACKHOLE","SE_CAF_ #endif /* ---------------------------------------------------------------------------- + ------------------------------------------------------------------------- */ + +INFO_TABLE(stg_WHITEHOLE, 0,0, INVALID_OBJECT, "WHITEHOLE", "WHITEHOLE") +{ foreign "C" barf("WHITEHOLE object entered!"); } + +/* ---------------------------------------------------------------------------- Some static info tables for things that don't get entered, and therefore don't need entry code (i.e. boxed but unpointed objects) NON_ENTERABLE_ENTRY_CODE now defined at the beginning of the file diff --git a/ghc/rts/Storage.c b/ghc/rts/Storage.c index f466a58..7e07ff2 100644 --- a/ghc/rts/Storage.c +++ b/ghc/rts/Storage.c @@ -26,6 +26,9 @@ #include #include +/* + * All these globals require sm_mutex to access in SMP mode. + */ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; @@ -405,10 +408,12 @@ assignNurseriesToCapabilities (void) for (i = 0; i < n_nurseries; i++) { capabilities[i].r.rNursery = &nurseries[i]; capabilities[i].r.rCurrentNursery = nurseries[i].blocks; + capabilities[i].r.rCurrentAlloc = NULL; } #else /* SMP */ MainCapability.r.rNursery = &nurseries[0]; MainCapability.r.rCurrentNursery = nurseries[0].blocks; + MainCapability.r.rCurrentAlloc = NULL; #endif } @@ -534,49 +539,49 @@ resizeNurseries (nat blocks) StgPtr allocate( nat n ) { - bdescr *bd; - StgPtr p; + bdescr *bd; + StgPtr p; - ACQUIRE_SM_LOCK; + ACQUIRE_SM_LOCK; - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - /* big allocation (>LARGE_OBJECT_THRESHOLD) */ - /* ToDo: allocate directly into generation 1 */ - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - bd = allocGroup(req_blocks); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks += req_blocks; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = BF_LARGE; - bd->free = bd->start + n; - alloc_blocks += req_blocks; - RELEASE_SM_LOCK; - return bd->start; + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); - /* small allocation ( alloc_HpLim) { - if (small_alloc_list) { - small_alloc_list->free = alloc_Hp; + /* big allocation (>LARGE_OBJECT_THRESHOLD) */ + /* ToDo: allocate directly into generation 1 */ + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + bd = allocGroup(req_blocks); + dbl_link_onto(bd, &g0s0->large_objects); + g0s0->n_large_blocks += req_blocks; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = BF_LARGE; + bd->free = bd->start + n; + alloc_blocks += req_blocks; + RELEASE_SM_LOCK; + return bd->start; + + /* small allocation ( alloc_HpLim) { + if (small_alloc_list) { + small_alloc_list->free = alloc_Hp; + } + bd = allocBlock(); + bd->link = small_alloc_list; + small_alloc_list = bd; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = 0; + alloc_Hp = bd->start; + alloc_HpLim = bd->start + BLOCK_SIZE_W; + alloc_blocks++; } - bd = allocBlock(); - bd->link = small_alloc_list; - small_alloc_list = bd; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = 0; - alloc_Hp = bd->start; - alloc_HpLim = bd->start + BLOCK_SIZE_W; - alloc_blocks++; - } - - p = alloc_Hp; - alloc_Hp += n; - RELEASE_SM_LOCK; - return p; + + p = alloc_Hp; + alloc_Hp += n; + RELEASE_SM_LOCK; + return p; } lnat @@ -603,6 +608,82 @@ tidyAllocateLists (void) } } +/* ----------------------------------------------------------------------------- + allocateLocal() + + This allocates memory in the current thread - it is intended for + use primarily from STG-land where we have a Capability. It is + better than allocate() because it doesn't require taking the + sm_mutex lock in the common case. + + Memory is allocated directly from the nursery if possible (but not + from the current nursery block, so as not to interfere with + Hp/HpLim). + -------------------------------------------------------------------------- */ + +StgPtr +allocateLocal( StgRegTable *reg, nat n ) +{ + bdescr *bd; + StgPtr p; + + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + + /* big allocation (>LARGE_OBJECT_THRESHOLD) */ + /* ToDo: allocate directly into generation 1 */ + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { + nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + ACQUIRE_SM_LOCK; + bd = allocGroup(req_blocks); + dbl_link_onto(bd, &g0s0->large_objects); + g0s0->n_large_blocks += req_blocks; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = BF_LARGE; + bd->free = bd->start + n; + alloc_blocks += req_blocks; + RELEASE_SM_LOCK; + return bd->start; + + /* small allocation (rCurrentAlloc; + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + + // The CurrentAlloc block is full, we need to find another + // one. First, we try taking the next block from the + // nursery: + bd = reg->rCurrentNursery->link; + + if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { + // The nursery is empty, or the next block is already + // full: allocate a fresh block (we can't fail here). + ACQUIRE_SM_LOCK; + bd = allocBlock(); + alloc_blocks++; + RELEASE_SM_LOCK; + bd->gen_no = 0; + bd->step = g0s0; + bd->flags = 0; + } else { + // we have a block in the nursery: take it and put + // it at the *front* of the nursery list, and use it + // to allocate() from. + reg->rCurrentNursery->link = bd->link; + } + bd->link = reg->rNursery->blocks; + reg->rNursery->blocks = bd; + bd->u.back = NULL; + reg->rCurrentAlloc = bd; + } + } + p = bd->free; + bd->free += n; + return p; +} + /* --------------------------------------------------------------------------- Allocate a fixed/pinned object. @@ -690,7 +771,11 @@ stgAllocForGMP (size_t size_in_bytes) total_size_in_words = sizeofW(StgArrWords) + data_size_in_words; /* allocate and fill it in. */ - arr = (StgArrWords *)allocate(total_size_in_words); +#if defined(SMP) + arr = (StgArrWords *)allocateLocal(&(myCapability()->r), total_size_in_words); +#else + arr = (StgArrWords *)allocateLocal(&MainCapability.r, total_size_in_words); +#endif SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words); /* and return a ptr to the goods inside the array */ @@ -740,9 +825,7 @@ calcAllocated( void ) nat i; allocated = allocated_bytes(); - for (i = 0; i < n_nurseries; i++) { - allocated += nurseries[i].n_blocks * BLOCK_SIZE_W; - } + allocated += countNurseryBlocks() * BLOCK_SIZE_W; #ifdef SMP for (i = 0; i < n_nurseries; i++) {