HP_CHK_GEN(alloc,liveness,reentry); \
TICK_ALLOC_HEAP_NOCTR(alloc);
+// allocateLocal() allocates from the nursery, so we check to see
+// whether the nursery is nearly empty in any function that uses
+// allocateLocal() - this includes many of the primops.
#define MAYBE_GC(liveness,reentry) \
- if (CInt[alloc_blocks] >= CInt[alloc_blocks_lim]) { \
+ if (bdescr_link(CurrentNursery) == NULL) { \
R9 = liveness; \
R10 = reentry; \
jump stg_gc_gen_hp; \
StgPtr rHpLim;
struct StgTSO_ *rCurrentTSO;
struct step_ *rNursery;
- struct bdescr_ *rCurrentNursery;
+ struct bdescr_ *rCurrentNursery; /* Hp/HpLim point into this block */
+ struct bdescr_ *rCurrentAlloc; /* for allocation using allocate() */
StgWord rHpAlloc; /* number of *bytes* being allocated in heap */
#if defined(SMP) || defined(PAR)
StgSparkPool rSparks; /* per-task spark pool */
#include "Parallel.h"
/* STG/Optimised-C related stuff */
-#include "SMP.h"
#include "Block.h"
-#ifdef SMP
-#include <pthread.h>
-#endif
-
/* GNU mp library */
#include "gmp.h"
/* ----------------------------------------------------------------------------
*
- * (c) The GHC Team, 1999
+ * (c) The GHC Team, 2005
*
* Macros for SMP support
*
#error Build options incompatible with SMP.
#endif
-/*
- * CMPXCHG - this instruction is the standard "test & set". We use it
- * for locking closures in the thunk and blackhole entry code. If the
- * closure is already locked, or has an unexpected info pointer
- * (because another thread is altering it in parallel), we just jump
- * to the new entry point.
- */
-#if defined(i386_HOST_ARCH) && defined(TABLES_NEXT_TO_CODE)
-#define CMPXCHG(p, cmp, new) \
- __asm__ __volatile__ ( \
- "lock ; cmpxchg %1, %0\n" \
- "\tje 1f\n" \
- "\tjmp *%%eax\n" \
- "\t1:\n" \
- : /* no outputs */ \
- : "m" (p), "r" (new), "r" (cmp) \
- )
-
/*
* XCHG - the atomic exchange instruction. Used for locking closures
* during updates (see LOCK_CLOSURE below) and the MVar primops.
*/
-#define XCHG(reg, obj) \
- __asm__ __volatile__ ( \
- "xchgl %1,%0" \
- :"+r" (reg), "+m" (obj) \
- : /* no input-only operands */ \
- )
-
+INLINE_HEADER StgWord
+xchg(StgPtr p, StgWord w)
+{
+ StgWord result;
+ result = w;
+ __asm__ __volatile__ (
+ "xchgl %1,%0"
+ :"+r" (result), "+m" (*p)
+ : /* no input-only operands */
+ );
+ return result;
+}
+
+INLINE_HEADER StgInfoTable *
+lockClosure(StgClosure *p)
+{
+ StgWord info;
+#if 0
+ do {
+ info = xchg((P_)&p->header.info, (W_)&stg_WHITEHOLE_info);
+ if (info != (W_)&stg_WHITEHOLE_info) return (StgInfoTable *)info;
+ yieldThread();
+ } while (1);
#else
-#error SMP macros not defined for this architecture
+ info = p->header.info;
#endif
-
-/*
- * LOCK_CLOSURE locks the specified closure, busy waiting for any
- * existing locks to be cleared.
- */
-#define LOCK_CLOSURE(c) \
- ({ \
- const StgInfoTable *__info; \
- __info = &stg_WHITEHOLE_info; \
- do { \
- XCHG(__info,((StgClosure *)(c))->header.info); \
- } while (__info == &stg_WHITEHOLE_info); \
- __info; \
- })
-
-#define LOCK_THUNK(__info) \
- CMPXCHG(R1.cl->header.info, __info, &stg_WHITEHOLE_info);
-
-#else /* !SMP */
-
-#define LOCK_CLOSURE(c) /* nothing */
-#define LOCK_THUNK(__info) /* nothing */
+}
#endif /* SMP */
RTS_INFO(stg_IND_OLDGEN_PERM_info);
RTS_INFO(stg_CAF_UNENTERED_info);
RTS_INFO(stg_CAF_ENTERED_info);
+RTS_INFO(stg_WHITEHOLE_info);
RTS_INFO(stg_BLACKHOLE_info);
RTS_INFO(stg_CAF_BLACKHOLE_info);
#ifdef TICKY_TICKY
RTS_ENTRY(stg_IND_OLDGEN_PERM_entry);
RTS_ENTRY(stg_CAF_UNENTERED_entry);
RTS_ENTRY(stg_CAF_ENTERED_entry);
+RTS_ENTRY(stg_WHITEHOLE_entry);
RTS_ENTRY(stg_BLACKHOLE_entry);
RTS_ENTRY(stg_CAF_BLACKHOLE_entry);
#ifdef TICKY_TICKY
-------------------------------------------------------------------------- */
extern StgPtr allocate ( nat n );
+extern StgPtr allocateLocal ( StgRegTable *reg, nat n );
extern StgPtr allocatePinned ( nat n );
extern lnat allocated_bytes ( void );
*/
#if defined(SMP)
extern Mutex sm_mutex;
+#endif
+
+#if defined(SMP)
#define ACQUIRE_SM_LOCK ACQUIRE_LOCK(&sm_mutex);
#define RELEASE_SM_LOCK RELEASE_LOCK(&sm_mutex);
#else
#include "OSThreads.h"
#include "Capability.h"
#include "Schedule.h" /* to get at EMPTY_RUN_QUEUE() */
+#if defined(SMP)
+#include "Hash.h"
+#endif
#if !defined(SMP)
Capability MainCapability; /* for non-SMP, we have one global capability */
* Free capability list.
*/
Capability *free_capabilities;
+
+/*
+ * Maps OSThreadId to Capability *
+ */
+HashTable *capability_hash;
#endif
#ifdef SMP
free_capabilities = &capabilities[0];
rts_n_free_capabilities = n;
+ capability_hash = allocHashTable();
+
IF_DEBUG(scheduler, sched_belch("allocated %d capabilities", n));
#else
capabilities = &MainCapability;
*cap = free_capabilities;
free_capabilities = (*cap)->link;
rts_n_free_capabilities--;
+ insertHashTable(capability_hash, osThreadId(), *cap);
#else
# if defined(RTS_SUPPORTS_THREADS)
ASSERT(rts_n_free_capabilities == 1);
}
/* ----------------------------------------------------------------------------
+ * Function: myCapability(void)
+ *
+ * Purpose: Return the capability owned by the current thread.
+ * Should not be used if the current thread does not
+ * hold a Capability.
+ * ------------------------------------------------------------------------- */
+Capability *
+myCapability (void)
+{
+#if defined(SMP)
+ return lookupHashTable(capability_hash, osThreadId());
+#else
+ return &MainCapability;
+#endif
+}
+
+/* ----------------------------------------------------------------------------
* Function: releaseCapability(Capability*)
*
* Purpose: Letting go of a capability. Causes a
#if defined(SMP)
cap->link = free_capabilities;
free_capabilities = cap;
+ ASSERT(myCapability() == cap);
+ removeHashTable(capability_hash, osThreadId(), NULL);
#endif
// Check to see whether a worker thread can be given
// the go-ahead to return the result of an external call..
//
extern void threadRunnable ( void );
+// Return the capability that I own.
+//
+extern Capability *myCapability (void);
+
extern void prodWorker ( void );
#ifdef RTS_SUPPORTS_THREADS
if (stp->is_compacted) {
collected += (oldgen_saved_blocks - stp->n_blocks) * BLOCK_SIZE_W;
} else {
- collected += stp->n_blocks * BLOCK_SIZE_W;
+ if (g == 0 && s == 0) {
+ collected += countNurseryBlocks() * BLOCK_SIZE_W;
+ collected += alloc_blocks;
+ } else {
+ collected += stp->n_blocks * BLOCK_SIZE_W;
+ }
}
/* free old memory and shift to-space into from-space for all
-\#include LdvProfile.h \
-\#include Profiling.h \
-\#include OSThreads.h \
- -\#include Apply.h
+ -\#include Apply.h \
+ -\#include SMP.h
ifeq "$(Windows)" "YES"
PrimOps_HC_OPTS += -\#include '<windows.h>' -\#include win32/AsyncIO.h
n = R1;
payload_words = ROUNDUP_BYTES_TO_WDS(n);
words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
- "ptr" p = foreign "C" allocate(words);
+ "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words);
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
StgArrWords_words(p) = payload_words;
MAYBE_GC(R2_PTR,newArrayzh_fast);
words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
- "ptr" arr = foreign "C" allocate(words);
+ "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words);
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
{
W_ mvar, val, info, tso;
-#if defined(SMP)
- foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
-#endif
-
/* args: R1 = MVar closure */
mvar = R1;
+#if defined(SMP)
+ "ptr" info = foreign "C" lockClosure(mvar "ptr");
+#else
info = GET_INFO(mvar);
+#endif
/* If the MVar is empty, put ourselves on its blocking queue,
* and wait until we're woken up.
StgMVar_tail(mvar) = CurrentTSO;
#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
+ SET_INFO(mvar,stg_EMPTY_MVAR_info);
#endif
jump stg_block_takemvar;
}
#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
+ SET_INFO(mvar,stg_FULL_MVAR_info);
#endif
RET_P(val);
else
{
/* No further putMVars, MVar is now empty */
-
- /* do this last... we might have locked the MVar in the SMP case,
- * and writing the info pointer will unlock it.
- */
- SET_INFO(mvar,stg_EMPTY_MVAR_info);
StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
-
-#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
-#endif
+
+ /* unlocks the closure in the SMP case */
+ SET_INFO(mvar,stg_EMPTY_MVAR_info);
RET_P(val);
}
{
W_ mvar, val, info, tso;
-#if defined(SMP)
- foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
-#endif
-
/* args: R1 = MVar closure */
mvar = R1;
+#if defined(SMP)
+ "ptr" info = foreign "C" lockClosure(mvar "ptr");
+#else
info = GET_INFO(mvar);
+#endif
if (info == stg_EMPTY_MVAR_info) {
+#if defined(SMP)
+ SET_INFO(mvar,stg_EMPTY_MVAR_info);
+#endif
/* HACK: we need a pointer to pass back,
* so we abuse NO_FINALIZER_closure
*/
-#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
-#endif
RET_NP(0, stg_NO_FINALIZER_closure);
}
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
}
+#if defined(SMP)
+ SET_INFO(mvar,stg_FULL_MVAR_info);
+#endif
}
else
{
SET_INFO(mvar,stg_EMPTY_MVAR_info);
}
-#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
-#endif
-
RET_NP(1, val);
}
{
W_ mvar, info, tso;
-#if defined(SMP)
- foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
-#endif
-
/* args: R1 = MVar, R2 = value */
mvar = R1;
+#if defined(SMP)
+ "ptr" info = foreign "C" lockClosure(mvar "ptr");
+#else
info = GET_INFO(mvar);
+#endif
if (info == stg_FULL_MVAR_info) {
if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
StgMVar_tail(mvar) = CurrentTSO;
#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
+ SET_INFO(mvar,stg_FULL_MVAR_info);
#endif
jump stg_block_putmvar;
}
}
#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
+ SET_INFO(mvar,stg_EMPTY_MVAR_info);
#endif
jump %ENTRY_CODE(Sp(0));
}
/* unlocks the MVar in the SMP case */
SET_INFO(mvar,stg_FULL_MVAR_info);
-#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
-#endif
jump %ENTRY_CODE(Sp(0));
}
{
W_ mvar, info, tso;
-#if defined(SMP)
- foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
-#endif
-
/* args: R1 = MVar, R2 = value */
mvar = R1;
+#if defined(SMP)
+ "ptr" info = foreign "C" lockClosure(mvar "ptr");
+#else
info = GET_INFO(mvar);
+#endif
if (info == stg_FULL_MVAR_info) {
#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
+ SET_INFO(mvar,stg_FULL_MVAR_info);
#endif
RET_N(0);
}
}
#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
+ SET_INFO(mvar,stg_EMPTY_MVAR_info);
#endif
jump %ENTRY_CODE(Sp(0));
}
StgMVar_value(mvar) = R2;
/* unlocks the MVar in the SMP case */
SET_INFO(mvar,stg_FULL_MVAR_info);
-#if defined(SMP)
- foreign "C" RELEASE_LOCK(sm_mutex "ptr");
-#endif
+
jump %ENTRY_CODE(Sp(0));
}
#endif
/* ----------------------------------------------------------------------------
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_WHITEHOLE, 0,0, INVALID_OBJECT, "WHITEHOLE", "WHITEHOLE")
+{ foreign "C" barf("WHITEHOLE object entered!"); }
+
+/* ----------------------------------------------------------------------------
Some static info tables for things that don't get entered, and
therefore don't need entry code (i.e. boxed but unpointed objects)
NON_ENTERABLE_ENTRY_CODE now defined at the beginning of the file
#include <stdlib.h>
#include <string.h>
+/*
+ * All these globals require sm_mutex to access in SMP mode.
+ */
StgClosure *caf_list = NULL;
StgClosure *revertible_caf_list = NULL;
rtsBool keepCAFs;
for (i = 0; i < n_nurseries; i++) {
capabilities[i].r.rNursery = &nurseries[i];
capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
+ capabilities[i].r.rCurrentAlloc = NULL;
}
#else /* SMP */
MainCapability.r.rNursery = &nurseries[0];
MainCapability.r.rCurrentNursery = nurseries[0].blocks;
+ MainCapability.r.rCurrentAlloc = NULL;
#endif
}
StgPtr
allocate( nat n )
{
- bdescr *bd;
- StgPtr p;
+ bdescr *bd;
+ StgPtr p;
- ACQUIRE_SM_LOCK;
+ ACQUIRE_SM_LOCK;
- TICK_ALLOC_HEAP_NOCTR(n);
- CCS_ALLOC(CCCS,n);
-
- /* big allocation (>LARGE_OBJECT_THRESHOLD) */
- /* ToDo: allocate directly into generation 1 */
- if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
- nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
- bd = allocGroup(req_blocks);
- dbl_link_onto(bd, &g0s0->large_objects);
- g0s0->n_large_blocks += req_blocks;
- bd->gen_no = 0;
- bd->step = g0s0;
- bd->flags = BF_LARGE;
- bd->free = bd->start + n;
- alloc_blocks += req_blocks;
- RELEASE_SM_LOCK;
- return bd->start;
+ TICK_ALLOC_HEAP_NOCTR(n);
+ CCS_ALLOC(CCCS,n);
- /* small allocation (<LARGE_OBJECT_THRESHOLD) */
- } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
- if (small_alloc_list) {
- small_alloc_list->free = alloc_Hp;
+ /* big allocation (>LARGE_OBJECT_THRESHOLD) */
+ /* ToDo: allocate directly into generation 1 */
+ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+ nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
+ bd = allocGroup(req_blocks);
+ dbl_link_onto(bd, &g0s0->large_objects);
+ g0s0->n_large_blocks += req_blocks;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = BF_LARGE;
+ bd->free = bd->start + n;
+ alloc_blocks += req_blocks;
+ RELEASE_SM_LOCK;
+ return bd->start;
+
+ /* small allocation (<LARGE_OBJECT_THRESHOLD) */
+ } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
+ if (small_alloc_list) {
+ small_alloc_list->free = alloc_Hp;
+ }
+ bd = allocBlock();
+ bd->link = small_alloc_list;
+ small_alloc_list = bd;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = 0;
+ alloc_Hp = bd->start;
+ alloc_HpLim = bd->start + BLOCK_SIZE_W;
+ alloc_blocks++;
}
- bd = allocBlock();
- bd->link = small_alloc_list;
- small_alloc_list = bd;
- bd->gen_no = 0;
- bd->step = g0s0;
- bd->flags = 0;
- alloc_Hp = bd->start;
- alloc_HpLim = bd->start + BLOCK_SIZE_W;
- alloc_blocks++;
- }
-
- p = alloc_Hp;
- alloc_Hp += n;
- RELEASE_SM_LOCK;
- return p;
+
+ p = alloc_Hp;
+ alloc_Hp += n;
+ RELEASE_SM_LOCK;
+ return p;
}
lnat
}
}
+/* -----------------------------------------------------------------------------
+ allocateLocal()
+
+ This allocates memory in the current thread - it is intended for
+ use primarily from STG-land where we have a Capability. It is
+ better than allocate() because it doesn't require taking the
+ sm_mutex lock in the common case.
+
+ Memory is allocated directly from the nursery if possible (but not
+ from the current nursery block, so as not to interfere with
+ Hp/HpLim).
+ -------------------------------------------------------------------------- */
+
+StgPtr
+allocateLocal( StgRegTable *reg, nat n )
+{
+ bdescr *bd;
+ StgPtr p;
+
+ TICK_ALLOC_HEAP_NOCTR(n);
+ CCS_ALLOC(CCCS,n);
+
+ /* big allocation (>LARGE_OBJECT_THRESHOLD) */
+ /* ToDo: allocate directly into generation 1 */
+ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+ nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
+ ACQUIRE_SM_LOCK;
+ bd = allocGroup(req_blocks);
+ dbl_link_onto(bd, &g0s0->large_objects);
+ g0s0->n_large_blocks += req_blocks;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = BF_LARGE;
+ bd->free = bd->start + n;
+ alloc_blocks += req_blocks;
+ RELEASE_SM_LOCK;
+ return bd->start;
+
+ /* small allocation (<LARGE_OBJECT_THRESHOLD) */
+ } else {
+
+ bd = reg->rCurrentAlloc;
+ if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+
+ // The CurrentAlloc block is full, we need to find another
+ // one. First, we try taking the next block from the
+ // nursery:
+ bd = reg->rCurrentNursery->link;
+
+ if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+ // The nursery is empty, or the next block is already
+ // full: allocate a fresh block (we can't fail here).
+ ACQUIRE_SM_LOCK;
+ bd = allocBlock();
+ alloc_blocks++;
+ RELEASE_SM_LOCK;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = 0;
+ } else {
+ // we have a block in the nursery: take it and put
+ // it at the *front* of the nursery list, and use it
+ // to allocate() from.
+ reg->rCurrentNursery->link = bd->link;
+ }
+ bd->link = reg->rNursery->blocks;
+ reg->rNursery->blocks = bd;
+ bd->u.back = NULL;
+ reg->rCurrentAlloc = bd;
+ }
+ }
+ p = bd->free;
+ bd->free += n;
+ return p;
+}
+
/* ---------------------------------------------------------------------------
Allocate a fixed/pinned object.
total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
/* allocate and fill it in. */
- arr = (StgArrWords *)allocate(total_size_in_words);
+#if defined(SMP)
+ arr = (StgArrWords *)allocateLocal(&(myCapability()->r), total_size_in_words);
+#else
+ arr = (StgArrWords *)allocateLocal(&MainCapability.r, total_size_in_words);
+#endif
SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
/* and return a ptr to the goods inside the array */
nat i;
allocated = allocated_bytes();
- for (i = 0; i < n_nurseries; i++) {
- allocated += nurseries[i].n_blocks * BLOCK_SIZE_W;
- }
+ allocated += countNurseryBlocks() * BLOCK_SIZE_W;
#ifdef SMP
for (i = 0; i < n_nurseries; i++) {