#include "Storage.h"
#include "Schedule.h"
#include "RetainerProfile.h" // for counting memory blocks (memInventory)
-#include "StoragePriv.h"
#include <stdlib.h>
#include <string.h>
+/*
+ * All these globals require sm_mutex to access in THREADED_RTS mode.
+ */
StgClosure *caf_list = NULL;
StgClosure *revertible_caf_list = NULL;
rtsBool keepCAFs;
ullong total_allocated = 0; /* total memory allocated during run */
nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */
-step *nurseries = NULL; /* array of nurseries, >1 only if SMP */
+step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */
+#ifdef THREADED_RTS
/*
* Storage manager mutex: protects all the above state from
* simultaneous access by two STG threads.
*/
-#ifdef SMP
-Mutex sm_mutex = INIT_MUTEX_VAR;
+Mutex sm_mutex;
+/*
+ * This mutex is used by atomicModifyMutVar# only
+ */
+Mutex atomic_modify_mutvar_mutex;
#endif
+
/*
* Forward references
*/
{
stp->no = s;
stp->blocks = NULL;
- stp->n_to_blocks = 0;
stp->n_blocks = 0;
+ stp->old_blocks = NULL;
+ stp->n_old_blocks = 0;
stp->gen = &generations[g];
stp->gen_no = g;
stp->hp = NULL;
stp->hpLim = NULL;
stp->hp_bd = NULL;
+ stp->scavd_hp = NULL;
+ stp->scavd_hpLim = NULL;
stp->scan = NULL;
stp->scan_bd = NULL;
stp->large_objects = NULL;
initBlockAllocator();
-#if defined(SMP)
+#if defined(THREADED_RTS)
initMutex(&sm_mutex);
+ initMutex(&atomic_modify_mutvar_mutex);
#endif
+ ACQUIRE_SM_LOCK;
+
/* allocate generation info array */
generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
* sizeof(struct generation_),
g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
}
-#ifdef SMP
- n_nurseries = RtsFlags.ParFlags.nNodes;
+#ifdef THREADED_RTS
+ n_nurseries = n_capabilities;
nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
"initStorage: nurseries");
#else
}
}
-#ifdef SMP
+#ifdef THREADED_RTS
for (s = 0; s < n_nurseries; s++) {
initStep(&nurseries[s], 0, s);
}
}
oldest_gen->steps[0].to = &oldest_gen->steps[0];
-#ifdef SMP
+#ifdef THREADED_RTS
for (s = 0; s < n_nurseries; s++) {
nurseries[s].to = generations[0].steps[0].to;
}
}
}
-#ifdef SMP
+#ifdef THREADED_RTS
if (RtsFlags.GcFlags.generations == 1) {
- errorBelch("-G1 is incompatible with SMP");
- stg_exit(1);
- }
- // No -H, for now
- if (RtsFlags.GcFlags.heapSizeSuggestion > 0) {
- errorBelch("-H<size> is incompatible with SMP");
- stg_exit(1);
+ errorBelch("-G1 is incompatible with -threaded");
+ stg_exit(EXIT_FAILURE);
}
#endif
mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
IF_DEBUG(gc, statDescribeGens());
+
+ RELEASE_SM_LOCK;
}
void
static void
assignNurseriesToCapabilities (void)
{
-#ifdef SMP
+#ifdef THREADED_RTS
nat i;
for (i = 0; i < n_nurseries; i++) {
capabilities[i].r.rNursery = &nurseries[i];
capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
+ capabilities[i].r.rCurrentAlloc = NULL;
}
-#else /* SMP */
+#else /* THREADED_RTS */
MainCapability.r.rNursery = &nurseries[0];
MainCapability.r.rCurrentNursery = nurseries[0].blocks;
+ MainCapability.r.rCurrentAlloc = NULL;
#endif
}
allocNursery(&nurseries[i], NULL,
RtsFlags.GcFlags.minAllocAreaSize);
nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
- nurseries[i].to_blocks = NULL;
- nurseries[i].n_to_blocks = 0;
+ nurseries[i].old_blocks = NULL;
+ nurseries[i].n_old_blocks = 0;
/* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
}
assignNurseriesToCapabilities();
// Resize each of the nurseries to the specified size.
//
void
-resizeNurseries (nat blocks)
+resizeNurseriesFixed (nat blocks)
{
nat i;
for (i = 0; i < n_nurseries; i++) {
}
}
+//
+// Resize the nurseries to the total specified size.
+//
+void
+resizeNurseries (nat blocks)
+{
+ // If there are multiple nurseries, then we just divide the number
+ // of available blocks between them.
+ resizeNurseriesFixed(blocks / n_nurseries);
+}
+
/* -----------------------------------------------------------------------------
The allocate() interface
StgPtr
allocate( nat n )
{
- bdescr *bd;
- StgPtr p;
+ bdescr *bd;
+ StgPtr p;
- ACQUIRE_SM_LOCK;
+ ACQUIRE_SM_LOCK;
- TICK_ALLOC_HEAP_NOCTR(n);
- CCS_ALLOC(CCCS,n);
-
- /* big allocation (>LARGE_OBJECT_THRESHOLD) */
- /* ToDo: allocate directly into generation 1 */
- if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
- nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
- bd = allocGroup(req_blocks);
- dbl_link_onto(bd, &g0s0->large_objects);
- g0s0->n_large_blocks += req_blocks;
- bd->gen_no = 0;
- bd->step = g0s0;
- bd->flags = BF_LARGE;
- bd->free = bd->start + n;
- alloc_blocks += req_blocks;
- RELEASE_SM_LOCK;
- return bd->start;
+ TICK_ALLOC_HEAP_NOCTR(n);
+ CCS_ALLOC(CCCS,n);
- /* small allocation (<LARGE_OBJECT_THRESHOLD) */
- } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
- if (small_alloc_list) {
- small_alloc_list->free = alloc_Hp;
+ /* big allocation (>LARGE_OBJECT_THRESHOLD) */
+ /* ToDo: allocate directly into generation 1 */
+ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+ nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
+ bd = allocGroup(req_blocks);
+ dbl_link_onto(bd, &g0s0->large_objects);
+ g0s0->n_large_blocks += req_blocks;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = BF_LARGE;
+ bd->free = bd->start + n;
+ alloc_blocks += req_blocks;
+ RELEASE_SM_LOCK;
+ return bd->start;
+
+ /* small allocation (<LARGE_OBJECT_THRESHOLD) */
+ } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
+ if (small_alloc_list) {
+ small_alloc_list->free = alloc_Hp;
+ }
+ bd = allocBlock();
+ bd->link = small_alloc_list;
+ small_alloc_list = bd;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = 0;
+ alloc_Hp = bd->start;
+ alloc_HpLim = bd->start + BLOCK_SIZE_W;
+ alloc_blocks++;
}
- bd = allocBlock();
- bd->link = small_alloc_list;
- small_alloc_list = bd;
- bd->gen_no = 0;
- bd->step = g0s0;
- bd->flags = 0;
- alloc_Hp = bd->start;
- alloc_HpLim = bd->start + BLOCK_SIZE_W;
- alloc_blocks++;
- }
-
- p = alloc_Hp;
- alloc_Hp += n;
- RELEASE_SM_LOCK;
- return p;
+
+ p = alloc_Hp;
+ alloc_Hp += n;
+ RELEASE_SM_LOCK;
+ return p;
}
lnat
}
}
+/* -----------------------------------------------------------------------------
+ allocateLocal()
+
+ This allocates memory in the current thread - it is intended for
+ use primarily from STG-land where we have a Capability. It is
+ better than allocate() because it doesn't require taking the
+ sm_mutex lock in the common case.
+
+ Memory is allocated directly from the nursery if possible (but not
+ from the current nursery block, so as not to interfere with
+ Hp/HpLim).
+ -------------------------------------------------------------------------- */
+
+StgPtr
+allocateLocal (Capability *cap, nat n)
+{
+ bdescr *bd;
+ StgPtr p;
+
+ TICK_ALLOC_HEAP_NOCTR(n);
+ CCS_ALLOC(CCCS,n);
+
+ /* big allocation (>LARGE_OBJECT_THRESHOLD) */
+ /* ToDo: allocate directly into generation 1 */
+ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+ nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
+ ACQUIRE_SM_LOCK;
+ bd = allocGroup(req_blocks);
+ dbl_link_onto(bd, &g0s0->large_objects);
+ g0s0->n_large_blocks += req_blocks;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = BF_LARGE;
+ bd->free = bd->start + n;
+ alloc_blocks += req_blocks;
+ RELEASE_SM_LOCK;
+ return bd->start;
+
+ /* small allocation (<LARGE_OBJECT_THRESHOLD) */
+ } else {
+
+ bd = cap->r.rCurrentAlloc;
+ if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+
+ // The CurrentAlloc block is full, we need to find another
+ // one. First, we try taking the next block from the
+ // nursery:
+ bd = cap->r.rCurrentNursery->link;
+
+ if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+ // The nursery is empty, or the next block is already
+ // full: allocate a fresh block (we can't fail here).
+ ACQUIRE_SM_LOCK;
+ bd = allocBlock();
+ cap->r.rNursery->n_blocks++;
+ RELEASE_SM_LOCK;
+ bd->gen_no = 0;
+ bd->step = cap->r.rNursery;
+ bd->flags = 0;
+ } else {
+ // we have a block in the nursery: take it and put
+ // it at the *front* of the nursery list, and use it
+ // to allocate() from.
+ cap->r.rCurrentNursery->link = bd->link;
+ if (bd->link != NULL) {
+ bd->link->u.back = cap->r.rCurrentNursery;
+ }
+ }
+ dbl_link_onto(bd, &cap->r.rNursery->blocks);
+ cap->r.rCurrentAlloc = bd;
+ IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
+ }
+ }
+ p = bd->free;
+ bd->free += n;
+ return p;
+}
+
/* ---------------------------------------------------------------------------
Allocate a fixed/pinned object.
}
/* -----------------------------------------------------------------------------
+ This is the write barrier for MUT_VARs, a.k.a. IORefs. A
+ MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
+ is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
+ and is put on the mutable list.
+ -------------------------------------------------------------------------- */
+
+void
+dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
+{
+ Capability *cap = regTableToCapability(reg);
+ bdescr *bd;
+ if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
+ p->header.info = &stg_MUT_VAR_DIRTY_info;
+ bd = Bdescr((StgPtr)p);
+ if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
+ }
+}
+
+/* -----------------------------------------------------------------------------
Allocation functions for GMP.
These all use the allocate() interface - we can't have any garbage
total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
/* allocate and fill it in. */
- arr = (StgArrWords *)allocate(total_size_in_words);
+#if defined(THREADED_RTS)
+ arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
+#else
+ arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
+#endif
SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
/* and return a ptr to the goods inside the array */
{
nat allocated;
bdescr *bd;
- nat i;
allocated = allocated_bytes();
- for (i = 0; i < n_nurseries; i++) {
- allocated += nurseries[i].n_blocks * BLOCK_SIZE_W;
- }
+ allocated += countNurseryBlocks() * BLOCK_SIZE_W;
-#ifdef SMP
+ {
+#ifdef THREADED_RTS
+ nat i;
for (i = 0; i < n_nurseries; i++) {
Capability *cap;
- for ( bd = capabilities[i].r.rCurrentNursery;
+ for ( bd = capabilities[i].r.rCurrentNursery->link;
bd != NULL; bd = bd->link ) {
allocated -= BLOCK_SIZE_W;
}
- current_nursery->free;
}
#endif
+ }
total_allocated += allocated;
return allocated;
step *stp;
if (RtsFlags.GcFlags.generations == 1) {
- live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
+ live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W +
((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
return live;
}
live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
/ sizeof(W_);
}
+ if (stp->scavd_hp != NULL) {
+ live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
+ }
}
}
return live;
bdescr *bd;
total_blocks = stp->n_blocks;
+ total_blocks += stp->n_old_blocks;
for (bd = stp->large_objects; bd; bd = bd->link) {
total_blocks += bd->blocks;
/* hack for megablock groups: they have an extra block or two in
/* count the blocks we current have */
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (i = 0; i < n_capabilities; i++) {
+ for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) {
+ total_blocks += bd->blocks;
+ }
+ }
for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
total_blocks += bd->blocks;
}
for (i = 0; i < n_nurseries; i++) {
total_blocks += stepBlocks(&nurseries[i]);
}
-
- if (RtsFlags.GcFlags.generations == 1) {
- /* two-space collector has a to-space too :-) */
- total_blocks += g0s0->n_to_blocks;
- }
+#ifdef THREADED_RTS
+ // We put pinned object blocks in g0s0, so better count blocks there too.
+ total_blocks += stepBlocks(g0s0);
+#endif
/* any blocks held by allocate() */
for (bd = small_alloc_list; bd; bd = bd->link) {
nat g, s;
if (RtsFlags.GcFlags.generations == 1) {
- checkHeap(g0s0->to_blocks);
+ checkHeap(g0s0->blocks);
checkChain(g0s0->large_objects);
} else {
}
for (s = 0; s < n_nurseries; s++) {
- ASSERT(countBlocks(generations[g].steps[s].blocks)
- == generations[g].steps[s].n_blocks);
- ASSERT(countBlocks(generations[g].steps[s].large_objects)
- == generations[g].steps[s].n_large_blocks);
+ ASSERT(countBlocks(nurseries[s].blocks)
+ == nurseries[s].n_blocks);
+ ASSERT(countBlocks(nurseries[s].large_objects)
+ == nurseries[s].n_large_blocks);
}
checkFreeListSanity();
}
}
+/* Nursery sanity check */
+void
+checkNurserySanity( step *stp )
+{
+ bdescr *bd, *prev;
+ nat blocks = 0;
+
+ prev = NULL;
+ for (bd = stp->blocks; bd != NULL; bd = bd->link) {
+ ASSERT(bd->u.back == prev);
+ prev = bd;
+ blocks += bd->blocks;
+ }
+ ASSERT(blocks == stp->n_blocks);
+}
+
// handy function for use in gdb, because Bdescr() is inlined.
extern bdescr *_bdescr( StgPtr p );