X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FStorage.c;h=4933854049a353ff5abd43b8fc7b0230f380d730;hb=2322bc9a89a9d8a6132a6818ccff6f665d7ed7f1;hp=b2878c2ce14f866936cb4c218eee5fe4d1fb083b;hpb=ec0984a97cec59d0403ae1f23e23f100666e8148;p=ghc-hetmet.git

diff --git a/ghc/rts/Storage.c b/ghc/rts/Storage.c
index b2878c2..4933854 100644
--- a/ghc/rts/Storage.c
+++ b/ghc/rts/Storage.c
@@ -22,11 +22,13 @@
 #include "Storage.h"
 #include "Schedule.h"
 #include "RetainerProfile.h"	// for counting memory blocks (memInventory)
-#include "StoragePriv.h"
 
 #include <stdlib.h>
 #include <string.h>
 
+/* 
+ * All these globals require sm_mutex to access in THREADED_RTS mode.
+ */
 StgClosure    *caf_list         = NULL;
 StgClosure    *revertible_caf_list = NULL;
 rtsBool       keepCAFs;
@@ -47,16 +49,21 @@ step *g0s0 		= NULL; /* generation 0, step 0, for convenience */
 ullong total_allocated = 0;	/* total memory allocated during run */
 
 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
-step *nurseries         = NULL; /* array of nurseries, >1 only if SMP */
+step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
 
+#ifdef THREADED_RTS
 /*
  * Storage manager mutex:  protects all the above state from
  * simultaneous access by two STG threads.
  */
-#ifdef SMP
-Mutex sm_mutex = INIT_MUTEX_VAR;
+Mutex sm_mutex;
+/*
+ * This mutex is used by atomicModifyMutVar# only
+ */
+Mutex atomic_modify_mutvar_mutex;
 #endif
 
+
 /*
  * Forward references
  */
@@ -69,13 +76,16 @@ initStep (step *stp, int g, int s)
 {
     stp->no = s;
     stp->blocks = NULL;
-    stp->n_to_blocks = 0;
     stp->n_blocks = 0;
+    stp->old_blocks = NULL;
+    stp->n_old_blocks = 0;
     stp->gen = &generations[g];
     stp->gen_no = g;
     stp->hp = NULL;
     stp->hpLim = NULL;
     stp->hp_bd = NULL;
+    stp->scavd_hp = NULL;
+    stp->scavd_hpLim = NULL;
     stp->scan = NULL;
     stp->scan_bd = NULL;
     stp->large_objects = NULL;
@@ -120,10 +130,13 @@ initStorage( void )
 
   initBlockAllocator();
   
-#if defined(SMP)
+#if defined(THREADED_RTS)
   initMutex(&sm_mutex);
+  initMutex(&atomic_modify_mutvar_mutex);
 #endif
 
+  ACQUIRE_SM_LOCK;
+
   /* allocate generation info array */
   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations 
 					     * sizeof(struct generation_),
@@ -166,8 +179,8 @@ initStorage( void )
     g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
   }
 
-#ifdef SMP
-  n_nurseries = RtsFlags.ParFlags.nNodes;
+#ifdef THREADED_RTS
+  n_nurseries = n_capabilities;
   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 			      "initStorage: nurseries");
 #else
@@ -182,7 +195,7 @@ initStorage( void )
     }
   }
   
-#ifdef SMP
+#ifdef THREADED_RTS
   for (s = 0; s < n_nurseries; s++) {
       initStep(&nurseries[s], 0, s);
   }
@@ -197,7 +210,7 @@ initStorage( void )
   }
   oldest_gen->steps[0].to = &oldest_gen->steps[0];
   
-#ifdef SMP
+#ifdef THREADED_RTS
   for (s = 0; s < n_nurseries; s++) {
       nurseries[s].to = generations[0].steps[0].to;
   }
@@ -212,15 +225,10 @@ initStorage( void )
       }
   }
 
-#ifdef SMP
+#ifdef THREADED_RTS
   if (RtsFlags.GcFlags.generations == 1) {
-      errorBelch("-G1 is incompatible with SMP");
-      stg_exit(1);
-  }
-  // No -H, for now
-  if (RtsFlags.GcFlags.heapSizeSuggestion > 0) {
-      errorBelch("-H<size> is incompatible with SMP");
-      stg_exit(1);
+      errorBelch("-G1 is incompatible with -threaded");
+      stg_exit(EXIT_FAILURE);
   }
 #endif
 
@@ -250,6 +258,8 @@ initStorage( void )
   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 
   IF_DEBUG(gc, statDescribeGens());
+
+  RELEASE_SM_LOCK;
 }
 
 void
@@ -405,16 +415,18 @@ allocNursery (step *stp, bdescr *tail, nat blocks)
 static void
 assignNurseriesToCapabilities (void)
 {
-#ifdef SMP
+#ifdef THREADED_RTS
     nat i;
 
     for (i = 0; i < n_nurseries; i++) {
 	capabilities[i].r.rNursery        = &nurseries[i];
 	capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
+	capabilities[i].r.rCurrentAlloc   = NULL;
     }
-#else /* SMP */
+#else /* THREADED_RTS */
     MainCapability.r.rNursery        = &nurseries[0];
     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
+    MainCapability.r.rCurrentAlloc   = NULL;
 #endif
 }
 
@@ -428,8 +440,8 @@ allocNurseries( void )
 	    allocNursery(&nurseries[i], NULL, 
 			 RtsFlags.GcFlags.minAllocAreaSize);
 	nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
-	nurseries[i].to_blocks   = NULL;
-	nurseries[i].n_to_blocks = 0;
+	nurseries[i].old_blocks   = NULL;
+	nurseries[i].n_old_blocks = 0;
 	/* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
     }
     assignNurseriesToCapabilities();
@@ -510,7 +522,7 @@ resizeNursery ( step *stp, nat blocks )
 // Resize each of the nurseries to the specified size.
 //
 void
-resizeNurseries (nat blocks)
+resizeNurseriesFixed (nat blocks)
 {
     nat i;
     for (i = 0; i < n_nurseries; i++) {
@@ -518,6 +530,17 @@ resizeNurseries (nat blocks)
     }
 }
 
+// 
+// Resize the nurseries to the total specified size.
+//
+void
+resizeNurseries (nat blocks)
+{
+    // If there are multiple nurseries, then we just divide the number
+    // of available blocks between them.
+    resizeNurseriesFixed(blocks / n_nurseries);
+}
+
 /* -----------------------------------------------------------------------------
    The allocate() interface
 
@@ -529,49 +552,49 @@ resizeNurseries (nat blocks)
 StgPtr
 allocate( nat n )
 {
-  bdescr *bd;
-  StgPtr p;
+    bdescr *bd;
+    StgPtr p;
 
-  ACQUIRE_SM_LOCK;
+    ACQUIRE_SM_LOCK;
 
-  TICK_ALLOC_HEAP_NOCTR(n);
-  CCS_ALLOC(CCCS,n);
-
-  /* big allocation (>LARGE_OBJECT_THRESHOLD) */
-  /* ToDo: allocate directly into generation 1 */
-  if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
-    nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
-    bd = allocGroup(req_blocks);
-    dbl_link_onto(bd, &g0s0->large_objects);
-    g0s0->n_large_blocks += req_blocks;
-    bd->gen_no  = 0;
-    bd->step = g0s0;
-    bd->flags = BF_LARGE;
-    bd->free = bd->start + n;
-    alloc_blocks += req_blocks;
-    RELEASE_SM_LOCK;
-    return bd->start;
+    TICK_ALLOC_HEAP_NOCTR(n);
+    CCS_ALLOC(CCCS,n);
 
-  /* small allocation (<LARGE_OBJECT_THRESHOLD) */
-  } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
-    if (small_alloc_list) {
-      small_alloc_list->free = alloc_Hp;
+    /* big allocation (>LARGE_OBJECT_THRESHOLD) */
+    /* ToDo: allocate directly into generation 1 */
+    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+	nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
+	bd = allocGroup(req_blocks);
+	dbl_link_onto(bd, &g0s0->large_objects);
+	g0s0->n_large_blocks += req_blocks;
+	bd->gen_no  = 0;
+	bd->step = g0s0;
+	bd->flags = BF_LARGE;
+	bd->free = bd->start + n;
+	alloc_blocks += req_blocks;
+	RELEASE_SM_LOCK;
+	return bd->start;
+	
+	/* small allocation (<LARGE_OBJECT_THRESHOLD) */
+    } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
+	if (small_alloc_list) {
+	    small_alloc_list->free = alloc_Hp;
+	}
+	bd = allocBlock();
+	bd->link = small_alloc_list;
+	small_alloc_list = bd;
+	bd->gen_no = 0;
+	bd->step = g0s0;
+	bd->flags = 0;
+	alloc_Hp = bd->start;
+	alloc_HpLim = bd->start + BLOCK_SIZE_W;
+	alloc_blocks++;
     }
-    bd = allocBlock();
-    bd->link = small_alloc_list;
-    small_alloc_list = bd;
-    bd->gen_no = 0;
-    bd->step = g0s0;
-    bd->flags = 0;
-    alloc_Hp = bd->start;
-    alloc_HpLim = bd->start + BLOCK_SIZE_W;
-    alloc_blocks++;
-  }
-
-  p = alloc_Hp;
-  alloc_Hp += n;
-  RELEASE_SM_LOCK;
-  return p;
+    
+    p = alloc_Hp;
+    alloc_Hp += n;
+    RELEASE_SM_LOCK;
+    return p;
 }
 
 lnat
@@ -598,6 +621,84 @@ tidyAllocateLists (void)
     }
 }
 
+/* -----------------------------------------------------------------------------
+   allocateLocal()
+
+   This allocates memory in the current thread - it is intended for
+   use primarily from STG-land where we have a Capability.  It is
+   better than allocate() because it doesn't require taking the
+   sm_mutex lock in the common case.
+
+   Memory is allocated directly from the nursery if possible (but not
+   from the current nursery block, so as not to interfere with
+   Hp/HpLim).
+   -------------------------------------------------------------------------- */
+
+StgPtr
+allocateLocal (Capability *cap, nat n)
+{
+    bdescr *bd;
+    StgPtr p;
+
+    TICK_ALLOC_HEAP_NOCTR(n);
+    CCS_ALLOC(CCCS,n);
+    
+    /* big allocation (>LARGE_OBJECT_THRESHOLD) */
+    /* ToDo: allocate directly into generation 1 */
+    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+	nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
+	ACQUIRE_SM_LOCK;
+	bd = allocGroup(req_blocks);
+	dbl_link_onto(bd, &g0s0->large_objects);
+	g0s0->n_large_blocks += req_blocks;
+	bd->gen_no  = 0;
+	bd->step = g0s0;
+	bd->flags = BF_LARGE;
+	bd->free = bd->start + n;
+	alloc_blocks += req_blocks;
+	RELEASE_SM_LOCK;
+	return bd->start;
+	
+	/* small allocation (<LARGE_OBJECT_THRESHOLD) */
+    } else {
+
+	bd = cap->r.rCurrentAlloc;
+	if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+
+	    // The CurrentAlloc block is full, we need to find another
+	    // one.  First, we try taking the next block from the
+	    // nursery:
+	    bd = cap->r.rCurrentNursery->link;
+
+	    if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+		// The nursery is empty, or the next block is already
+		// full: allocate a fresh block (we can't fail here).
+		ACQUIRE_SM_LOCK;
+		bd = allocBlock();
+		cap->r.rNursery->n_blocks++;
+		RELEASE_SM_LOCK;
+		bd->gen_no = 0;
+		bd->step = cap->r.rNursery;
+		bd->flags = 0;
+	    } else {
+		// we have a block in the nursery: take it and put
+		// it at the *front* of the nursery list, and use it
+		// to allocate() from.
+		cap->r.rCurrentNursery->link = bd->link;
+		if (bd->link != NULL) {
+		    bd->link->u.back = cap->r.rCurrentNursery;
+		}
+	    }
+	    dbl_link_onto(bd, &cap->r.rNursery->blocks);
+	    cap->r.rCurrentAlloc = bd;
+	    IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
+	}
+    }
+    p = bd->free;
+    bd->free += n;
+    return p;
+}
+
 /* ---------------------------------------------------------------------------
    Allocate a fixed/pinned object.
 
@@ -664,6 +765,25 @@ allocatePinned( nat n )
 }
 
 /* -----------------------------------------------------------------------------
+   This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
+   MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
+   is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
+   and is put on the mutable list.
+   -------------------------------------------------------------------------- */
+
+void
+dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
+{
+    Capability *cap = regTableToCapability(reg);
+    bdescr *bd;
+    if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
+	p->header.info = &stg_MUT_VAR_DIRTY_info;
+	bd = Bdescr((StgPtr)p);
+	if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
+    }
+}
+
+/* -----------------------------------------------------------------------------
    Allocation functions for GMP.
 
    These all use the allocate() interface - we can't have any garbage
@@ -685,7 +805,11 @@ stgAllocForGMP (size_t size_in_bytes)
   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
   
   /* allocate and fill it in. */
-  arr = (StgArrWords *)allocate(total_size_in_words);
+#if defined(THREADED_RTS)
+  arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
+#else
+  arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
+#endif
   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
   
   /* and return a ptr to the goods inside the array */
@@ -732,17 +856,16 @@ calcAllocated( void )
 {
   nat allocated;
   bdescr *bd;
-  nat i;
 
   allocated = allocated_bytes();
-  for (i = 0; i < n_nurseries; i++) {
-      allocated += nurseries[i].n_blocks * BLOCK_SIZE_W;
-  }
+  allocated += countNurseryBlocks() * BLOCK_SIZE_W;
   
-#ifdef SMP
+  {
+#ifdef THREADED_RTS
+  nat i;
   for (i = 0; i < n_nurseries; i++) {
       Capability *cap;
-      for ( bd = capabilities[i].r.rCurrentNursery; 
+      for ( bd = capabilities[i].r.rCurrentNursery->link; 
 	    bd != NULL; bd = bd->link ) {
 	  allocated -= BLOCK_SIZE_W;
       }
@@ -764,6 +887,7 @@ calcAllocated( void )
 	  - current_nursery->free;
   }
 #endif
+  }
 
   total_allocated += allocated;
   return allocated;
@@ -780,7 +904,7 @@ calcLive(void)
   step *stp;
 
   if (RtsFlags.GcFlags.generations == 1) {
-    live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W + 
+    live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W + 
       ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
     return live;
   }
@@ -799,6 +923,9 @@ calcLive(void)
 	  live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start) 
 	      / sizeof(W_);
       }
+      if (stp->scavd_hp != NULL) {
+	  live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
+      }
     }
   }
   return live;
@@ -852,6 +979,7 @@ stepBlocks (step *stp)
     bdescr *bd;
 
     total_blocks = stp->n_blocks;    
+    total_blocks += stp->n_old_blocks;
     for (bd = stp->large_objects; bd; bd = bd->link) {
 	total_blocks += bd->blocks;
 	/* hack for megablock groups: they have an extra block or two in
@@ -877,6 +1005,11 @@ memInventory(void)
   /* count the blocks we current have */
 
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+      for (i = 0; i < n_capabilities; i++) {
+	  for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) {
+	      total_blocks += bd->blocks;
+	  }
+      }	  
       for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 	  total_blocks += bd->blocks;
       }
@@ -890,11 +1023,10 @@ memInventory(void)
   for (i = 0; i < n_nurseries; i++) {
       total_blocks += stepBlocks(&nurseries[i]);
   }
-
-  if (RtsFlags.GcFlags.generations == 1) {
-      /* two-space collector has a to-space too :-) */
-      total_blocks += g0s0->n_to_blocks;
-  }
+#ifdef THREADED_RTS
+  // We put pinned object blocks in g0s0, so better count blocks there too.
+  total_blocks += stepBlocks(g0s0);
+#endif
 
   /* any blocks held by allocate() */
   for (bd = small_alloc_list; bd; bd = bd->link) {
@@ -941,7 +1073,7 @@ checkSanity( void )
     nat g, s;
 
     if (RtsFlags.GcFlags.generations == 1) {
-	checkHeap(g0s0->to_blocks);
+	checkHeap(g0s0->blocks);
 	checkChain(g0s0->large_objects);
     } else {
 	
@@ -961,16 +1093,32 @@ checkSanity( void )
 	}
 
 	for (s = 0; s < n_nurseries; s++) {
-	    ASSERT(countBlocks(generations[g].steps[s].blocks)
-		   == generations[g].steps[s].n_blocks);
-	    ASSERT(countBlocks(generations[g].steps[s].large_objects)
-		   == generations[g].steps[s].n_large_blocks);
+	    ASSERT(countBlocks(nurseries[s].blocks)
+		   == nurseries[s].n_blocks);
+	    ASSERT(countBlocks(nurseries[s].large_objects)
+		   == nurseries[s].n_large_blocks);
 	}
 	    
 	checkFreeListSanity();
     }
 }
 
+/* Nursery sanity check */
+void
+checkNurserySanity( step *stp )
+{
+    bdescr *bd, *prev;
+    nat blocks = 0;
+
+    prev = NULL;
+    for (bd = stp->blocks; bd != NULL; bd = bd->link) {
+	ASSERT(bd->u.back == prev);
+	prev = bd;
+	blocks += bd->blocks;
+    }
+    ASSERT(blocks == stp->n_blocks);
+}
+
 // handy function for use in gdb, because Bdescr() is inlined.
 extern bdescr *_bdescr( StgPtr p );