Instead of a separate context-switch flag, set HpLim to zero
authorSimon Marlow <marlowsd@gmail.com>
Fri, 13 Mar 2009 10:45:16 +0000 (10:45 +0000)
committerSimon Marlow <marlowsd@gmail.com>
Fri, 13 Mar 2009 10:45:16 +0000 (10:45 +0000)
This reduces the latency between a context-switch being triggered and
the thread returning to the scheduler, which in turn should reduce the
cost of the GC barrier when there are many cores.

We still retain the old context_switch flag which is checked at the
end of each block of allocation.  The idea is that setting HpLim may
fail if the the target thread is modifying HpLim at the same time; the
context_switch flag is a fallback.  It also allows us to "context
switch soon" without forcing an immediate switch, which can be costly.

includes/MachRegs.h
includes/Regs.h
rts/Capability.c
rts/Capability.h
rts/HeapStackCheck.cmm
rts/Interpreter.c
rts/PrimOps.cmm
rts/Schedule.c
rts/StgStartup.cmm
rts/Threads.c
rts/posix/Signals.c

index adc132b..f51f782 100644 (file)
@@ -95,7 +95,6 @@
    s0  $9      Sp
    s2  $11     SpLim
    s3  $12     Hp   
-   s4  $13     HpLim
    t8  $22     NCG_reserved
    t12 $27     NCG_reserved
    -------------------------------------------------------------------------- */
 #  define REG_SpLim     11
 
 #  define REG_Hp       12
-#  define REG_HpLim    13
   
 #  define NCG_Reserved_I1 22
 #  define NCG_Reserved_I2 27
 #define REG_SpLim      r6
 
 #define REG_Hp         r7
-#define REG_HpLim      r8
 
 #define NCG_Reserved_I1 r28
 #define NCG_Reserved_I2        r29
    esi     R1
    edi     Hp
 
-   Leaving SpLim, and HpLim out of the picture.
+   Leaving SpLim out of the picture.
    -------------------------------------------------------------------------- */
 
 
 #define REG_Sp    rbp
 #define REG_Hp    r12
 #define REG_R1    rbx
-#define REG_R2    rsi
-#define REG_R3    rdi
-#define REG_R4    r8
-#define REG_R5    r9
-#define REG_SpLim r14
-#define REG_HpLim r15
+#define REG_R2    r14
+#define REG_R3    rsi
+#define REG_R4    rdi
+#define REG_R5    r8
+#define REG_R6    r9
+#define REG_SpLim r15
 
 #define REG_F1    xmm1
 #define REG_F2    xmm2
 #define REG_D1    xmm5
 #define REG_D2    xmm6
 
-#define CALLER_SAVES_R2
 #define CALLER_SAVES_R3
 #define CALLER_SAVES_R4
 #define CALLER_SAVES_R5
+#define CALLER_SAVES_R6
 
 #define CALLER_SAVES_F1
 #define CALLER_SAVES_F2
 #define CALLER_SAVES_D1
 #define CALLER_SAVES_D2
 
-#define MAX_REAL_VANILLA_REG 5
+#define MAX_REAL_VANILLA_REG 6
 #define MAX_REAL_FLOAT_REG   4
 #define MAX_REAL_DOUBLE_REG  2
 #define MAX_REAL_LONG_REG    0
 #define REG_SpLim      d3
 
 #define REG_Hp         d4
-#define REG_HpLim       d5
 
 #define REG_R1         a5
 #define REG_R2         d6
 #define REG_SpLim      21
 
 #define REG_Hp         22
-#define REG_HpLim      23
 
 #define REG_Base       30
 
 #define REG_SpLim      r24
 
 #define REG_Hp         r25
-#define REG_HpLim      r26
 
 #define REG_Base        r27
 
 #define REG_SpLim      loc26
 
 #define REG_Hp         loc27
-#define REG_HpLim      loc28
 
 #endif /* ia64 */
 
      %i1        Base
      %i2        SpLim
      %i3        Hp
-     %i4        HpLim
      %i5        R6
      %i6                    C frame ptr
      %i7                    C ret addr
 #define REG_SpLim      i2
 
 #define REG_Hp         i3
-#define REG_HpLim      i4
 
 #define REG_Base       i1
 
index 45f9149..29c04b3 100644 (file)
@@ -133,7 +133,6 @@ typedef struct StgRegTable_ {
 #define SAVE_SpLim         (CurrentTSO->splim)
 
 #define SAVE_Hp                    (BaseReg->rHp)
-#define SAVE_HpLim         (BaseReg->rHpLim)
 
 #define SAVE_CurrentTSO     (BaseReg->rCurrentTSO)
 #define SAVE_CurrentNursery (BaseReg->rCurrentNursery)
@@ -349,7 +348,7 @@ GLOBAL_REG_DECL(P_,Hp,REG_Hp)
 #endif
 
 #if defined(REG_HpLim) && !defined(NO_GLOBAL_REG_DECLS)
-GLOBAL_REG_DECL(P_,HpLim,REG_HpLim)
+#error HpLim cannot be in a register
 #else
 #define HpLim (BaseReg->rHpLim)
 #endif
@@ -570,14 +569,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
 #define CALLER_RESTORE_Hp      /* nothing */
 #endif
 
-#ifdef CALLER_SAVES_HpLim
-#define CALLER_SAVE_HpLim      SAVE_HpLim = HpLim;
-#define CALLER_RESTORE_HpLim   HpLim = SAVE_HpLim;
-#else
-#define CALLER_SAVE_HpLim      /* nothing */
-#define CALLER_RESTORE_HpLim           /* nothing */
-#endif
-
 #ifdef CALLER_SAVES_Base
 #ifdef THREADED_RTS
 #error "Can't have caller-saved BaseReg with THREADED_RTS"
@@ -644,7 +635,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
   CALLER_SAVE_Sp                               \
   CALLER_SAVE_SpLim                            \
   CALLER_SAVE_Hp                               \
-  CALLER_SAVE_HpLim                            \
   CALLER_SAVE_CurrentTSO                       \
   CALLER_SAVE_CurrentNursery                   \
   CALLER_SAVE_Base
@@ -673,7 +663,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
   CALLER_RESTORE_Sp                            \
   CALLER_RESTORE_SpLim                         \
   CALLER_RESTORE_Hp                            \
-  CALLER_RESTORE_HpLim                         \
   CALLER_RESTORE_CurrentTSO                    \
   CALLER_RESTORE_CurrentNursery
 
index a81d710..bd6d56f 100644 (file)
@@ -294,10 +294,10 @@ initCapabilities( void )
 
 void setContextSwitches(void)
 {
-  nat i;
-  for (i=0; i < n_capabilities; i++) {
-    capabilities[i].context_switch = 1;
-  }
+    nat i;
+    for (i=0; i < n_capabilities; i++) {
+        contextSwitchCapability(&capabilities[i]);
+    }
 }
 
 /* ----------------------------------------------------------------------------
@@ -482,14 +482,17 @@ waitForReturnCapability (Capability **pCap, Task *task)
        if (!cap->running_task) {
            nat i;
            // otherwise, search for a free capability
+            cap = NULL;
            for (i = 0; i < n_capabilities; i++) {
-               cap = &capabilities[i];
-               if (!cap->running_task) {
+               if (!capabilities[i].running_task) {
+                    cap = &capabilities[i];
                    break;
                }
            }
-           // Can't find a free one, use last_free_capability.
-           cap = last_free_capability;
+            if (cap == NULL) {
+                // Can't find a free one, use last_free_capability.
+                cap = last_free_capability;
+            }
        }
 
        // record the Capability as the one this Task is now assocated with.
index 77132e3..0f61fad 100644 (file)
@@ -276,6 +276,7 @@ extern void grabCapability (Capability **pCap);
 
 // cause all capabilities to context switch as soon as possible.
 void setContextSwitches(void);
+INLINE_HEADER void contextSwitchCapability(Capability *cap);
 
 // Free all capabilities
 void freeCapabilities (void);
@@ -322,4 +323,16 @@ discardSparksCap (Capability *cap)
 { return discardSparks(cap->sparks); }
 #endif
 
+INLINE_HEADER void
+contextSwitchCapability (Capability *cap)
+{
+    // setting HpLim to NULL ensures that the next heap check will
+    // fail, and the thread will return to the scheduler.
+    cap->r.rHpLim = NULL;
+    // But just in case it didn't work (the target thread might be
+    // modifying HpLim at the same time), we set the end-of-block
+    // context-switch flag too:
+    cap->context_switch = 1;
+}
+
 #endif /* CAPABILITY_H */
index 94cec38..10baca2 100644 (file)
@@ -23,8 +23,11 @@ import LeaveCriticalSection;
  *
  * On discovering that a stack or heap check has failed, we do the following:
  *
- *    - If the context_switch flag is set, indicating that there are more
- *      threads waiting to run, we yield to the scheduler 
+ *    - If HpLim==0, indicating that we should context-switch, we yield
+ *      to the scheduler (return ThreadYielding).
+ *
+ *    - If the context_switch flag is set (the backup plan if setting HpLim
+ *      to 0 didn't trigger a context switch), we yield to the scheduler
  *     (return ThreadYielding).
  *
  *    - If Hp > HpLim, we've had a heap check failure.  This means we've
@@ -60,6 +63,10 @@ import LeaveCriticalSection;
 #define GC_GENERIC                                             \
     DEBUG_ONLY(foreign "C" heapCheckFail());                   \
     if (Hp > HpLim) {                                          \
+        if (HpLim == 0) { \
+                R1 = ThreadYielding;                           \
+                goto sched;                                    \
+        }                                              \
         Hp = Hp - HpAlloc/*in bytes*/;                         \
         if (HpAlloc <= BLOCK_SIZE                              \
             && bdescr_link(CurrentNursery) != NULL) {          \
index 1b2d730..1a6e927 100644 (file)
@@ -196,6 +196,9 @@ interpretBCO (Capability* cap)
 
     LOAD_STACK_POINTERS;
 
+    cap->r.rHpLim = (P_)1; // HpLim is the context-switch flag; when it
+                           // goes to zero we must return to the scheduler.
+
     // ------------------------------------------------------------------------
     // Case 1:
     // 
@@ -1281,7 +1284,7 @@ run_BCO:
            // context switching: sometimes the scheduler can invoke
            // the interpreter with context_switch == 1, particularly
            // if the -C0 flag has been given on the cmd line.
-           if (cap->context_switch) {
+           if (cap->r.rHpLim == NULL) {
                Sp--; Sp[0] = (W_)&stg_enter_info;
                RETURN_TO_SCHEDULER(ThreadInterpret, ThreadYielding);
            }
index adb2a64..121102c 100644 (file)
@@ -1073,7 +1073,8 @@ forkzh_fast
 
   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
 
-  // switch at the earliest opportunity
+  // context switch soon, but not immediately: we don't want every
+  // forkIO to force a context-switch.
   Capability_context_switch(MyCapability()) = 1 :: CInt;
   
   RET_P(threadid);
@@ -1102,7 +1103,8 @@ forkOnzh_fast
 
   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
 
-  // switch at the earliest opportunity
+  // context switch soon, but not immediately: we don't want every
+  // forkIO to force a context-switch.
   Capability_context_switch(MyCapability()) = 1 :: CInt;
   
   RET_P(threadid);
index 47636a3..040d16f 100644 (file)
@@ -1268,7 +1268,7 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
               "--<< thread %ld (%s) stopped: HeapOverflow",
               (long)t->id, whatNext_strs[t->what_next]);
 
-    if (cap->context_switch) {
+    if (cap->r.rHpLim == NULL || cap->context_switch) {
         // Sometimes we miss a context switch, e.g. when calling
         // primitives in a tight loop, MAYBE_GC() doesn't check the
         // context switch flag, and we end up waiting for a GC.
index 16e5c62..c3c0bc3 100644 (file)
@@ -28,9 +28,7 @@
     ASSERT(Hp != 0);                   \
     ASSERT(Sp != 0);                   \
     ASSERT(SpLim != 0);                        \
-    ASSERT(HpLim != 0);                        \
-    ASSERT(SpLim - WDS(RESERVED_STACK_WORDS) <= Sp); \
-    ASSERT(HpLim >= Hp);
+    ASSERT(SpLim - WDS(RESERVED_STACK_WORDS) <= Sp);
 
 /* -----------------------------------------------------------------------------
    Returning from the STG world.
index 2c7b2be..936b90e 100644 (file)
@@ -505,8 +505,10 @@ unblockOne_ (Capability *cap, StgTSO *tso,
       }
       tso->cap = cap;
       appendToRunQueue(cap,tso);
-      // we're holding a newly woken thread, make sure we context switch
-      // quickly so we can migrate it if necessary.
+
+      // context-switch soonish so we can migrate the new thread if
+      // necessary.  NB. not contextSwitchCapability(cap), which would
+      // force a context switch immediately.
       cap->context_switch = 1;
   } else {
       // we'll try to wake it up on the Capability it was last on.
@@ -514,6 +516,10 @@ unblockOne_ (Capability *cap, StgTSO *tso,
   }
 #else
   appendToRunQueue(cap,tso);
+
+  // context-switch soonish so we can migrate the new thread if
+  // necessary.  NB. not contextSwitchCapability(cap), which would
+  // force a context switch immediately.
   cap->context_switch = 1;
 #endif
 
index 8268e6f..6d5ef43 100644 (file)
@@ -214,7 +214,7 @@ generic_handler(int sig USED_IF_THREADS,
        stg_exit(EXIT_FAILURE);
     }
     
-    MainCapability.context_switch = 1;
+    contextSwitchCapability(&MainCapability);
 
 #endif /* THREADED_RTS */
 }