From 047b7c2f56d60e551892915dc6f47371a46389d7 Mon Sep 17 00:00:00 2001
From: Simon Marlow <simonmar@microsoft.com>
Date: Wed, 21 Nov 2007 15:58:51 +0000
Subject: [PATCH] cache bd->todo_bd->free and the limit in the workspace
 avoids cache contention: bd->todo_bd->free may clash with
 any cache line, so we localise it.

---
 rts/sm/Evac.c    |   19 ++++++++++---------
 rts/sm/GC.c      |    2 ++
 rts/sm/GC.h      |    9 ++++++---
 rts/sm/GCUtils.c |   10 ++++++++--
 rts/sm/GCUtils.h |    2 +-
 rts/sm/Scav.c    |    7 ++++++-
 6 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index fc344f9..2fcc6c9 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -37,7 +37,6 @@ alloc_for_copy (nat size, step *stp)
 {
     StgPtr to;
     step_workspace *ws;
-    bdescr *bd;
 
     /* Find out where we're going, using the handy "to" pointer in 
      * the step of the source object.  If it turns out we need to
@@ -57,17 +56,18 @@ alloc_for_copy (nat size, step *stp)
     /* chain a new block onto the to-space for the destination step if
      * necessary.
      */
-    bd = ws->todo_bd;
-    to = bd->free;
-    if (to + size >= bd->start + BLOCK_SIZE_W) {
-	bd = gc_alloc_todo_block(ws);
-	to = bd->free;
+    
+    ASSERT(ws->todo_free >= ws->todo_bd->free && ws->todo_free <= ws->todo_lim);
+    to = ws->todo_free;
+    if (to + size >= ws->todo_lim) {
+	to = gc_alloc_todo_block(ws);
     }
-    bd->free = to + size;
+    ws->todo_free = to + size;
+    ASSERT(ws->todo_free >= ws->todo_bd->free && ws->todo_free <= ws->todo_lim);
 
     return to;
 }
-  
+
 /* -----------------------------------------------------------------------------
    The evacuate() code
    -------------------------------------------------------------------------- */
@@ -164,7 +164,8 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val)
     prev = NULL;
     while (p)
     {
-        ASSERT(p->header.info == &stg_BLACKHOLE_info);
+        ASSERT(p->header.info == &stg_BLACKHOLE_info
+              || p->header.info == &stg_WHITEHOLE_info);
         prev = (StgSelector*)((StgClosure *)p)->payload[0];
 
         // Update the THUNK_SELECTOR with an indirection to the
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index a07086e..8aca959 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -1239,6 +1239,8 @@ init_uncollected_gen (nat g, nat threads)
 	    if (isPartiallyFull(stp->blocks))
 	    {
 		ws->todo_bd = stp->blocks;
+                ws->todo_free = ws->todo_bd->free;
+                ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
 		stp->blocks = stp->blocks->link;
 		stp->n_blocks -= 1;
 		ws->todo_bd->link = NULL;
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index 5d7924e..cba2400 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -82,6 +82,9 @@ typedef struct step_workspace_ {
 
     // where objects to be scavenged go
     bdescr *     todo_bd;
+    StgPtr       todo_free;            // free ptr for todo_bd
+    StgPtr       todo_lim;             // lim for todo_bd
+
     bdescr *     buffer_todo_bd;     // buffer to reduce contention
                                      // on the step's todos list
 
@@ -132,9 +135,9 @@ typedef struct gc_thread_ {
                                    // optimise it into a per-thread
                                    // variable).
 
-    rtsBool failed_to_evac;        // failue to evacuate an object typically 
-                                   //  causes it to be recorded in the mutable 
-                                   //  object list
+    rtsBool failed_to_evac;        // failure to evacuate an object typically 
+                                   // causes it to be recorded in the mutable 
+                                   // object list
 
     rtsBool eager_promotion;       // forces promotion to the evac gen
                                    // instead of the to-space
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index a65131a..441bb3e 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -91,11 +91,15 @@ push_scan_block (bdescr *bd, step_workspace *ws)
 	     ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks));
 }
 
-bdescr *
+StgPtr
 gc_alloc_todo_block (step_workspace *ws)
 {
     bdescr *bd;
 
+    if (ws->todo_bd != NULL) {
+        ws->todo_bd->free = ws->todo_free;
+    }
+
     // If we already have a todo block, it must be full, so we push it
     // out: first to the buffer_todo_bd, then to the step.  BUT, don't
     // push out the block out if it is already the scan block.
@@ -124,8 +128,10 @@ gc_alloc_todo_block (step_workspace *ws)
     }
 	
     ws->todo_bd = bd;
+    ws->todo_free = bd->start;
+    ws->todo_lim  = bd->start + BLOCK_SIZE_W;
 
-    return bd;
+    return ws->todo_free;
 }
 
 /* -----------------------------------------------------------------------------
diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h
index 2b22407..57c3b0c 100644
--- a/rts/sm/GCUtils.h
+++ b/rts/sm/GCUtils.h
@@ -21,7 +21,7 @@ bdescr *allocBlock_sync(void);
 
 void    push_scan_block      (bdescr *bd, step_workspace *ws);
 bdescr *grab_todo_block      (step_workspace *ws);
-bdescr *gc_alloc_todo_block  (step_workspace *ws);
+StgPtr  gc_alloc_todo_block  (step_workspace *ws);
 bdescr *gc_alloc_scavd_block (step_workspace *ws);
 
 // Returns true if a block is 3/4 full.  This predicate is used to try
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 3f68efb..7ee97c9 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -275,7 +275,7 @@ linear_scan:
 	info = get_itbl((StgClosure *)p);
 	
 	q = p;
-	switch (info->type) {
+        switch (((volatile StgWord *)info)[1] & 0xffff) {
 	    
         case MVAR_CLEAN:
         case MVAR_DIRTY:
@@ -1469,6 +1469,11 @@ scavenge_find_local_work (void)
 	    }
 	    ws = &gct->steps[g][s];
 
+            if (ws->todo_bd != NULL)
+            {
+                ws->todo_bd->free = ws->todo_free;
+            }
+
 	    // If we have a todo block and no scan block, start
 	    // scanning the todo block.
 	    if (ws->scan_bd == NULL && ws->todo_bd != NULL)
-- 
1.7.10.4