From: Simon Marlow Date: Fri, 13 Mar 2009 10:45:16 +0000 (+0000) Subject: Instead of a separate context-switch flag, set HpLim to zero X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=304e7fb703e7afddc1ef9be6aab6505e36b63b06;p=ghc-hetmet.git Instead of a separate context-switch flag, set HpLim to zero This reduces the latency between a context-switch being triggered and the thread returning to the scheduler, which in turn should reduce the cost of the GC barrier when there are many cores. We still retain the old context_switch flag which is checked at the end of each block of allocation. The idea is that setting HpLim may fail if the the target thread is modifying HpLim at the same time; the context_switch flag is a fallback. It also allows us to "context switch soon" without forcing an immediate switch, which can be costly. --- diff --git a/includes/MachRegs.h b/includes/MachRegs.h index adc132b..f51f782 100644 --- a/includes/MachRegs.h +++ b/includes/MachRegs.h @@ -95,7 +95,6 @@ s0 $9 Sp s2 $11 SpLim s3 $12 Hp - s4 $13 HpLim t8 $22 NCG_reserved t12 $27 NCG_reserved -------------------------------------------------------------------------- */ @@ -134,7 +133,6 @@ # define REG_SpLim 11 # define REG_Hp 12 -# define REG_HpLim 13 # define NCG_Reserved_I1 22 # define NCG_Reserved_I2 27 @@ -189,7 +187,6 @@ #define REG_SpLim r6 #define REG_Hp r7 -#define REG_HpLim r8 #define NCG_Reserved_I1 r28 #define NCG_Reserved_I2 r29 @@ -215,7 +212,7 @@ esi R1 edi Hp - Leaving SpLim, and HpLim out of the picture. + Leaving SpLim out of the picture. -------------------------------------------------------------------------- */ @@ -284,12 +281,12 @@ #define REG_Sp rbp #define REG_Hp r12 #define REG_R1 rbx -#define REG_R2 rsi -#define REG_R3 rdi -#define REG_R4 r8 -#define REG_R5 r9 -#define REG_SpLim r14 -#define REG_HpLim r15 +#define REG_R2 r14 +#define REG_R3 rsi +#define REG_R4 rdi +#define REG_R5 r8 +#define REG_R6 r9 +#define REG_SpLim r15 #define REG_F1 xmm1 #define REG_F2 xmm2 @@ -299,10 +296,10 @@ #define REG_D1 xmm5 #define REG_D2 xmm6 -#define CALLER_SAVES_R2 #define CALLER_SAVES_R3 #define CALLER_SAVES_R4 #define CALLER_SAVES_R5 +#define CALLER_SAVES_R6 #define CALLER_SAVES_F1 #define CALLER_SAVES_F2 @@ -312,7 +309,7 @@ #define CALLER_SAVES_D1 #define CALLER_SAVES_D2 -#define MAX_REAL_VANILLA_REG 5 +#define MAX_REAL_VANILLA_REG 6 #define MAX_REAL_FLOAT_REG 4 #define MAX_REAL_DOUBLE_REG 2 #define MAX_REAL_LONG_REG 0 @@ -361,7 +358,6 @@ #define REG_SpLim d3 #define REG_Hp d4 -#define REG_HpLim d5 #define REG_R1 a5 #define REG_R2 d6 @@ -425,7 +421,6 @@ #define REG_SpLim 21 #define REG_Hp 22 -#define REG_HpLim 23 #define REG_Base 30 @@ -500,7 +495,6 @@ #define REG_SpLim r24 #define REG_Hp r25 -#define REG_HpLim r26 #define REG_Base r27 @@ -543,7 +537,6 @@ #define REG_SpLim loc26 #define REG_Hp loc27 -#define REG_HpLim loc28 #endif /* ia64 */ @@ -597,7 +590,6 @@ %i1 Base %i2 SpLim %i3 Hp - %i4 HpLim %i5 R6 %i6 C frame ptr %i7 C ret addr @@ -666,7 +658,6 @@ #define REG_SpLim i2 #define REG_Hp i3 -#define REG_HpLim i4 #define REG_Base i1 diff --git a/includes/Regs.h b/includes/Regs.h index 45f9149..29c04b3 100644 --- a/includes/Regs.h +++ b/includes/Regs.h @@ -133,7 +133,6 @@ typedef struct StgRegTable_ { #define SAVE_SpLim (CurrentTSO->splim) #define SAVE_Hp (BaseReg->rHp) -#define SAVE_HpLim (BaseReg->rHpLim) #define SAVE_CurrentTSO (BaseReg->rCurrentTSO) #define SAVE_CurrentNursery (BaseReg->rCurrentNursery) @@ -349,7 +348,7 @@ GLOBAL_REG_DECL(P_,Hp,REG_Hp) #endif #if defined(REG_HpLim) && !defined(NO_GLOBAL_REG_DECLS) -GLOBAL_REG_DECL(P_,HpLim,REG_HpLim) +#error HpLim cannot be in a register #else #define HpLim (BaseReg->rHpLim) #endif @@ -570,14 +569,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc) #define CALLER_RESTORE_Hp /* nothing */ #endif -#ifdef CALLER_SAVES_HpLim -#define CALLER_SAVE_HpLim SAVE_HpLim = HpLim; -#define CALLER_RESTORE_HpLim HpLim = SAVE_HpLim; -#else -#define CALLER_SAVE_HpLim /* nothing */ -#define CALLER_RESTORE_HpLim /* nothing */ -#endif - #ifdef CALLER_SAVES_Base #ifdef THREADED_RTS #error "Can't have caller-saved BaseReg with THREADED_RTS" @@ -644,7 +635,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc) CALLER_SAVE_Sp \ CALLER_SAVE_SpLim \ CALLER_SAVE_Hp \ - CALLER_SAVE_HpLim \ CALLER_SAVE_CurrentTSO \ CALLER_SAVE_CurrentNursery \ CALLER_SAVE_Base @@ -673,7 +663,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc) CALLER_RESTORE_Sp \ CALLER_RESTORE_SpLim \ CALLER_RESTORE_Hp \ - CALLER_RESTORE_HpLim \ CALLER_RESTORE_CurrentTSO \ CALLER_RESTORE_CurrentNursery diff --git a/rts/Capability.c b/rts/Capability.c index a81d710..bd6d56f 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -294,10 +294,10 @@ initCapabilities( void ) void setContextSwitches(void) { - nat i; - for (i=0; i < n_capabilities; i++) { - capabilities[i].context_switch = 1; - } + nat i; + for (i=0; i < n_capabilities; i++) { + contextSwitchCapability(&capabilities[i]); + } } /* ---------------------------------------------------------------------------- @@ -482,14 +482,17 @@ waitForReturnCapability (Capability **pCap, Task *task) if (!cap->running_task) { nat i; // otherwise, search for a free capability + cap = NULL; for (i = 0; i < n_capabilities; i++) { - cap = &capabilities[i]; - if (!cap->running_task) { + if (!capabilities[i].running_task) { + cap = &capabilities[i]; break; } } - // Can't find a free one, use last_free_capability. - cap = last_free_capability; + if (cap == NULL) { + // Can't find a free one, use last_free_capability. + cap = last_free_capability; + } } // record the Capability as the one this Task is now assocated with. diff --git a/rts/Capability.h b/rts/Capability.h index 77132e3..0f61fad 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -276,6 +276,7 @@ extern void grabCapability (Capability **pCap); // cause all capabilities to context switch as soon as possible. void setContextSwitches(void); +INLINE_HEADER void contextSwitchCapability(Capability *cap); // Free all capabilities void freeCapabilities (void); @@ -322,4 +323,16 @@ discardSparksCap (Capability *cap) { return discardSparks(cap->sparks); } #endif +INLINE_HEADER void +contextSwitchCapability (Capability *cap) +{ + // setting HpLim to NULL ensures that the next heap check will + // fail, and the thread will return to the scheduler. + cap->r.rHpLim = NULL; + // But just in case it didn't work (the target thread might be + // modifying HpLim at the same time), we set the end-of-block + // context-switch flag too: + cap->context_switch = 1; +} + #endif /* CAPABILITY_H */ diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm index 94cec38..10baca2 100644 --- a/rts/HeapStackCheck.cmm +++ b/rts/HeapStackCheck.cmm @@ -23,8 +23,11 @@ import LeaveCriticalSection; * * On discovering that a stack or heap check has failed, we do the following: * - * - If the context_switch flag is set, indicating that there are more - * threads waiting to run, we yield to the scheduler + * - If HpLim==0, indicating that we should context-switch, we yield + * to the scheduler (return ThreadYielding). + * + * - If the context_switch flag is set (the backup plan if setting HpLim + * to 0 didn't trigger a context switch), we yield to the scheduler * (return ThreadYielding). * * - If Hp > HpLim, we've had a heap check failure. This means we've @@ -60,6 +63,10 @@ import LeaveCriticalSection; #define GC_GENERIC \ DEBUG_ONLY(foreign "C" heapCheckFail()); \ if (Hp > HpLim) { \ + if (HpLim == 0) { \ + R1 = ThreadYielding; \ + goto sched; \ + } \ Hp = Hp - HpAlloc/*in bytes*/; \ if (HpAlloc <= BLOCK_SIZE \ && bdescr_link(CurrentNursery) != NULL) { \ diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 1b2d730..1a6e927 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -196,6 +196,9 @@ interpretBCO (Capability* cap) LOAD_STACK_POINTERS; + cap->r.rHpLim = (P_)1; // HpLim is the context-switch flag; when it + // goes to zero we must return to the scheduler. + // ------------------------------------------------------------------------ // Case 1: // @@ -1281,7 +1284,7 @@ run_BCO: // context switching: sometimes the scheduler can invoke // the interpreter with context_switch == 1, particularly // if the -C0 flag has been given on the cmd line. - if (cap->context_switch) { + if (cap->r.rHpLim == NULL) { Sp--; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER(ThreadInterpret, ThreadYielding); } diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index adb2a64..121102c 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1073,7 +1073,8 @@ forkzh_fast foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; - // switch at the earliest opportunity + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); @@ -1102,7 +1103,8 @@ forkOnzh_fast foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; - // switch at the earliest opportunity + // context switch soon, but not immediately: we don't want every + // forkIO to force a context-switch. Capability_context_switch(MyCapability()) = 1 :: CInt; RET_P(threadid); diff --git a/rts/Schedule.c b/rts/Schedule.c index 47636a3..040d16f 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -1268,7 +1268,7 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t ) "--<< thread %ld (%s) stopped: HeapOverflow", (long)t->id, whatNext_strs[t->what_next]); - if (cap->context_switch) { + if (cap->r.rHpLim == NULL || cap->context_switch) { // Sometimes we miss a context switch, e.g. when calling // primitives in a tight loop, MAYBE_GC() doesn't check the // context switch flag, and we end up waiting for a GC. diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm index 16e5c62..c3c0bc3 100644 --- a/rts/StgStartup.cmm +++ b/rts/StgStartup.cmm @@ -28,9 +28,7 @@ ASSERT(Hp != 0); \ ASSERT(Sp != 0); \ ASSERT(SpLim != 0); \ - ASSERT(HpLim != 0); \ - ASSERT(SpLim - WDS(RESERVED_STACK_WORDS) <= Sp); \ - ASSERT(HpLim >= Hp); + ASSERT(SpLim - WDS(RESERVED_STACK_WORDS) <= Sp); /* ----------------------------------------------------------------------------- Returning from the STG world. diff --git a/rts/Threads.c b/rts/Threads.c index 2c7b2be..936b90e 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -505,8 +505,10 @@ unblockOne_ (Capability *cap, StgTSO *tso, } tso->cap = cap; appendToRunQueue(cap,tso); - // we're holding a newly woken thread, make sure we context switch - // quickly so we can migrate it if necessary. + + // context-switch soonish so we can migrate the new thread if + // necessary. NB. not contextSwitchCapability(cap), which would + // force a context switch immediately. cap->context_switch = 1; } else { // we'll try to wake it up on the Capability it was last on. @@ -514,6 +516,10 @@ unblockOne_ (Capability *cap, StgTSO *tso, } #else appendToRunQueue(cap,tso); + + // context-switch soonish so we can migrate the new thread if + // necessary. NB. not contextSwitchCapability(cap), which would + // force a context switch immediately. cap->context_switch = 1; #endif diff --git a/rts/posix/Signals.c b/rts/posix/Signals.c index 8268e6f..6d5ef43 100644 --- a/rts/posix/Signals.c +++ b/rts/posix/Signals.c @@ -214,7 +214,7 @@ generic_handler(int sig USED_IF_THREADS, stg_exit(EXIT_FAILURE); } - MainCapability.context_switch = 1; + contextSwitchCapability(&MainCapability); #endif /* THREADED_RTS */ }