X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FHeapStackCheck.cmm;h=f8195768fbf35bc8b18398f0c55e048409f34250;hp=75f14184a95a7faffcb412dd712b83bfd16be865;hb=f4692220c7cbdadaa633f50eb2b30b59edb30183;hpb=d31dfb32ea936c22628b508c28a36c12e631430a diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm index 75f1418..f819576 100644 --- a/rts/HeapStackCheck.cmm +++ b/rts/HeapStackCheck.cmm @@ -12,34 +12,64 @@ #include "Cmm.h" +#ifdef __PIC__ +import pthread_mutex_unlock; +#endif +import EnterCriticalSection; +import LeaveCriticalSection; + /* Stack/Heap Check Failure * ------------------------ * - * On discovering that a stack or heap check has failed, we do the following: + * Both heap and stack check failures end up in the same place, so + * that we can share the code for the failure case when a proc needs + * both a stack check and a heap check (a common case). * - * - If the context_switch flag is set, indicating that there are more - * threads waiting to run, we yield to the scheduler - * (return ThreadYielding). + * So when we get here, we have to tell the difference between a stack + * check failure and a heap check failure. The code for the checks + * looks like this: + + if (Sp - 16 < SpLim) goto c1Tf; + Hp = Hp + 16; + if (Hp > HpLim) goto c1Th; + ... + c1Th: + HpAlloc = 16; + goto c1Tf; + c1Tf: jump stg_gc_enter_1 (); + + * Note that Sp is not decremented by the check, whereas Hp is. The + * reasons for this seem to be largely historic, I can't think of a + * good reason not to decrement Sp at the check too. (--SDM) * - * - If Hp > HpLim, we've had a heap check failure. This means we've - * come to the end of the current heap block, so we try to chain - * another block on with ExtendNursery(). + * Note that HpLim may be set to zero arbitrarily by the timer signal + * or another processor to trigger a context switch via heap check + * failure. * - * - If this succeeds, we carry on without returning to the - * scheduler. + * The job of these fragments (stg_gc_enter_1 and friends) is to + * 1. Leave no slop in the heap, so Hp must be retreated if it was + * incremented by the check. No-slop is a requirement for LDV + * profiling, at least. + * 2. If a heap check failed, try to grab another heap block from + * the nursery and continue. + * 3. otherwise, return to the scheduler with StackOverflow, + * HeapOverflow, or ThreadYielding as appropriate. * - * - If it fails, we return to the scheduler claiming HeapOverflow - * so that a garbage collection can be performed. + * We can tell whether Hp was incremented, because HpAlloc is + * non-zero: HpAlloc is required to be zero at all times unless a + * heap-check just failed, which is why the stack-check failure case + * does not set HpAlloc (see code fragment above). So that covers (1). + * HpAlloc is zeroed in LOAD_THREAD_STATE(). * - * - If Hp <= HpLim, it must have been a stack check that failed. In - * which case, we return to the scheduler claiming StackOverflow, the - * scheduler will either increase the size of our stack, or raise - * an exception if the stack is already too big. + * If Hp > HpLim, then either (a) we have reached the end of the + * current heap block, or (b) HpLim == 0 and we should yield. Hence + * check Hp > HpLim first, and then HpLim == 0 to decide whether to + * return ThreadYielding or try to grab another heap block from the + * nursery. * - * The effect of checking for context switch only in the heap/stack check - * failure code is that we'll switch threads after the current thread has - * reached the end of its heap block. If a thread isn't allocating - * at all, it won't yield. Hopefully this won't be a problem in practice. + * If Hp <= HpLim, then this must be a StackOverflow. The scheduler + * will either increase the size of our stack, or raise an exception if + * the stack is already too big. */ #define PRE_RETURN(why,what_next) \ @@ -51,30 +81,35 @@ * ThreadRunGHC thread. */ -#define GC_GENERIC \ - DEBUG_ONLY(foreign "C" heapCheckFail()); \ - if (Hp > HpLim) { \ - Hp = Hp - HpAlloc/*in bytes*/; \ - if (HpAlloc <= BLOCK_SIZE \ - && bdescr_link(CurrentNursery) != NULL) { \ - CLOSE_NURSERY(); \ - CurrentNursery = bdescr_link(CurrentNursery); \ - OPEN_NURSERY(); \ - if (CInt[context_switch] != 0 :: CInt) { \ - R1 = ThreadYielding; \ - goto sched; \ - } else { \ - jump %ENTRY_CODE(Sp(0)); \ - } \ - } else { \ - R1 = HeapOverflow; \ - goto sched; \ - } \ - } else { \ - R1 = StackOverflow; \ - } \ - sched: \ - PRE_RETURN(R1,ThreadRunGHC); \ +#define GC_GENERIC \ + DEBUG_ONLY(foreign "C" heapCheckFail()); \ + if (Hp > HpLim) { \ + Hp = Hp - HpAlloc/*in bytes*/; \ + if (HpLim == 0) { \ + R1 = ThreadYielding; \ + goto sched; \ + } \ + if (HpAlloc <= BLOCK_SIZE \ + && bdescr_link(CurrentNursery) != NULL) { \ + HpAlloc = 0; \ + CLOSE_NURSERY(); \ + CurrentNursery = bdescr_link(CurrentNursery); \ + OPEN_NURSERY(); \ + if (Capability_context_switch(MyCapability()) != 0 :: CInt) { \ + R1 = ThreadYielding; \ + goto sched; \ + } else { \ + jump %ENTRY_CODE(Sp(0)); \ + } \ + } else { \ + R1 = HeapOverflow; \ + goto sched; \ + } \ + } else { \ + R1 = StackOverflow; \ + } \ + sched: \ + PRE_RETURN(R1,ThreadRunGHC); \ jump stg_returnToSched; #define HP_GENERIC \ @@ -108,7 +143,7 @@ There are canned sequences for 'n' pointer values in registers. -------------------------------------------------------------------------- */ -INFO_TABLE_RET( stg_enter, RET_SMALL, "ptr" W_ unused) +INFO_TABLE_RET( stg_enter, RET_SMALL, P_ unused) { R1 = Sp(1); Sp_adj(2); @@ -123,296 +158,24 @@ __stg_gc_enter_1 GC_GENERIC } -#if defined(GRAN) -/* - ToDo: merge the block and yield macros, calling something like BLOCK(N) - at the end; -*/ - -/* - Should we actually ever do a yield in such a case?? -- HWL -*/ -gran_yield_0 -{ - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadYielding; - jump StgReturn; -} - -gran_yield_1 -{ - Sp_adj(-1); - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadYielding; - jump StgReturn; -} - -/*- 2 Regs--------------------------------------------------------------------*/ - -gran_yield_2 -{ - Sp_adj(-2); - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadYielding; - jump StgReturn; -} - -/*- 3 Regs -------------------------------------------------------------------*/ - -gran_yield_3 -{ - Sp_adj(-3); - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadYielding; - jump StgReturn; -} - -/*- 4 Regs -------------------------------------------------------------------*/ - -gran_yield_4 -{ - Sp_adj(-4); - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadYielding; - jump StgReturn; -} - -/*- 5 Regs -------------------------------------------------------------------*/ - -gran_yield_5 -{ - Sp_adj(-5); - Sp(4) = R5; - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadYielding; - jump StgReturn; -} - -/*- 6 Regs -------------------------------------------------------------------*/ - -gran_yield_6 -{ - Sp_adj(-6); - Sp(5) = R6; - Sp(4) = R5; - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadYielding; - jump StgReturn; -} - -/*- 7 Regs -------------------------------------------------------------------*/ - -gran_yield_7 -{ - Sp_adj(-7); - Sp(6) = R7; - Sp(5) = R6; - Sp(4) = R5; - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadYielding; - jump StgReturn; -} - -/*- 8 Regs -------------------------------------------------------------------*/ - -gran_yield_8 -{ - Sp_adj(-8); - Sp(7) = R8; - Sp(6) = R7; - Sp(5) = R6; - Sp(4) = R5; - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadYielding; - jump StgReturn; -} - -// the same routines but with a block rather than a yield - -gran_block_1 -{ - Sp_adj(-1); - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadBlocked; - jump StgReturn; -} - -/*- 2 Regs--------------------------------------------------------------------*/ - -gran_block_2 -{ - Sp_adj(-2); - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadBlocked; - jump StgReturn; -} - -/*- 3 Regs -------------------------------------------------------------------*/ - -gran_block_3 -{ - Sp_adj(-3); - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadBlocked; - jump StgReturn; -} - -/*- 4 Regs -------------------------------------------------------------------*/ - -gran_block_4 -{ - Sp_adj(-4); - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadBlocked; - jump StgReturn; -} - -/*- 5 Regs -------------------------------------------------------------------*/ - -gran_block_5 -{ - Sp_adj(-5); - Sp(4) = R5; - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadBlocked; - jump StgReturn; -} - -/*- 6 Regs -------------------------------------------------------------------*/ - -gran_block_6 -{ - Sp_adj(-6); - Sp(5) = R6; - Sp(4) = R5; - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadBlocked; - jump StgReturn; -} - -/*- 7 Regs -------------------------------------------------------------------*/ - -gran_block_7 -{ - Sp_adj(-7); - Sp(6) = R7; - Sp(5) = R6; - Sp(4) = R5; - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadBlocked; - jump StgReturn; -} - -/*- 8 Regs -------------------------------------------------------------------*/ - -gran_block_8 -{ - Sp_adj(-8); - Sp(7) = R8; - Sp(6) = R7; - Sp(5) = R6; - Sp(4) = R5; - Sp(3) = R4; - Sp(2) = R3; - Sp(1) = R2; - Sp(0) = R1; - SAVE_THREAD_STATE(); - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadBlocked; - jump StgReturn; -} - -#endif - -#if 0 && defined(PAR) - -/* - Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the - saving of the thread state from the actual jump via an StgReturn. - We need this separation because we call RTS routines in blocking entry codes - before jumping back into the RTS (see parallel/FetchMe.hc). -*/ - -par_block_1_no_jump -{ - Sp_adj(-1); - Sp(0) = R1; - SAVE_THREAD_STATE(); -} +/* ----------------------------------------------------------------------------- + stg_enter_checkbh is just like stg_enter, except that we also call + checkBlockingQueues(). The point of this is that the GC can + replace an stg_marked_upd_frame with an stg_enter_checkbh if it + finds that the BLACKHOLE has already been updated by another + thread. It would be unsafe to use stg_enter, because there might + be an orphaned BLOCKING_QUEUE now. + -------------------------------------------------------------------------- */ -par_jump +INFO_TABLE_RET( stg_enter_checkbh, RET_SMALL, P_ unused) { - TSO_what_next(CurrentTSO) = ThreadRunGHC; - R1 = ThreadBlocked; - jump StgReturn; + R1 = Sp(1); + Sp_adj(2); + foreign "C" checkBlockingQueues(MyCapability() "ptr", + CurrentTSO) [R1]; + ENTER(); } -#endif - /* ----------------------------------------------------------------------------- Heap checks in Primitive case alternatives @@ -438,7 +201,7 @@ INFO_TABLE_RET( stg_gc_void, RET_SMALL) /*-- R1 is boxed/unpointed -------------------------------------------------- */ -INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, "ptr" W_ unused) +INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, P_ unused) { R1 = Sp(1); Sp_adj(2); @@ -525,7 +288,7 @@ stg_gc_l1 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */ -INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused ) +INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused ) { Sp_adj(1); // one ptr is on the stack (Sp(0)) @@ -568,7 +331,7 @@ __stg_gc_fun W_ info; W_ type; - info = %GET_FUN_INFO(R1); + info = %GET_FUN_INFO(UNTAG(R1)); // cache the size type = TO_W_(StgFunInfoExtra_fun_type(info)); @@ -579,7 +342,7 @@ __stg_gc_fun #ifdef TABLES_NEXT_TO_CODE // bitmap field holds an offset size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) - + %GET_ENTRY(R1) /* ### */ ); + + %GET_ENTRY(UNTAG(R1)) /* ### */ ); #else size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) ); #endif @@ -629,12 +392,12 @@ INFO_TABLE_RET( stg_gc_fun, RET_FUN ) #ifdef NO_ARG_REGS // Minor optimisation: there are no argument registers to load up, // so we can just jump straight to the function's entry point. - jump %GET_ENTRY(R1); + jump %GET_ENTRY(UNTAG(R1)); #else W_ info; W_ type; - info = %GET_FUN_INFO(R1); + info = %GET_FUN_INFO(UNTAG(R1)); type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN || type == ARG_GEN_BIG) { jump StgFunInfoExtra_slow_apply(info); @@ -718,9 +481,13 @@ INFO_TABLE_RET( stg_gc_gen, RET_DYN ) stg_gc_gen { + // Hack; see Note [mvar-heap-check] in PrimOps.cmm + if (R10 == stg_putMVarzh || R10 == stg_takeMVarzh) { + unlockClosure(R1, stg_MVAR_DIRTY_info) + } SAVE_EVERYTHING; GC_GENERIC -} +} // A heap check at an unboxed tuple return point. The return address // is on the stack, and we can find it by using the offsets given @@ -810,19 +577,17 @@ stg_block_1 * * -------------------------------------------------------------------------- */ -INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, "ptr" W_ unused ) +INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, P_ unused ) { R1 = Sp(1); Sp_adj(2); - jump takeMVarzh_fast; + jump stg_takeMVarzh; } // code fragment executed just before we return to the scheduler stg_block_takemvar_finally { -#ifdef THREADED_RTS - unlockClosure(R3, stg_EMPTY_MVAR_info); -#endif + unlockClosure(R3, stg_MVAR_DIRTY_info); jump StgReturn; } @@ -835,20 +600,18 @@ stg_block_takemvar BLOCK_BUT_FIRST(stg_block_takemvar_finally); } -INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, "ptr" W_ unused1, "ptr" W_ unused2 ) +INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, P_ unused1, P_ unused2 ) { R2 = Sp(2); R1 = Sp(1); Sp_adj(3); - jump putMVarzh_fast; + jump stg_putMVarzh; } // code fragment executed just before we return to the scheduler stg_block_putmvar_finally { -#ifdef THREADED_RTS - unlockClosure(R3, stg_FULL_MVAR_info); -#endif + unlockClosure(R3, stg_MVAR_DIRTY_info); jump StgReturn; } @@ -862,39 +625,30 @@ stg_block_putmvar BLOCK_BUT_FIRST(stg_block_putmvar_finally); } -// code fragment executed just before we return to the scheduler -stg_block_blackhole_finally -{ -#if defined(THREADED_RTS) - // The last thing we do is release sched_lock, which is - // preventing other threads from accessing blackhole_queue and - // picking up this thread before we are finished with it. - foreign "C" RELEASE_LOCK(sched_mutex "ptr"); -#endif - jump StgReturn; -} - stg_block_blackhole { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_enter_info; - BLOCK_BUT_FIRST(stg_block_blackhole_finally); + BLOCK_GENERIC; } -INFO_TABLE_RET( stg_block_throwto, RET_SMALL, "ptr" W_ unused, "ptr" W_ unused ) +INFO_TABLE_RET( stg_block_throwto, RET_SMALL, P_ unused, P_ unused ) { R2 = Sp(2); R1 = Sp(1); Sp_adj(3); - jump killThreadzh_fast; + jump stg_killThreadzh; } stg_block_throwto_finally { -#ifdef THREADED_RTS - foreign "C" throwToReleaseTarget (R3 "ptr"); -#endif + // unlock the throwto message, but only if it wasn't already + // unlocked. It may have been unlocked if we revoked the message + // due to an exception being raised during threadPaused(). + if (StgHeader_info(StgTSO_block_info(CurrentTSO)) == stg_WHITEHOLE_info) { + unlockClosure(StgTSO_block_info(CurrentTSO), stg_MSG_THROWTO_info); + } jump StgReturn; } @@ -933,7 +687,7 @@ stg_block_async /* Used by threadDelay implementation; it would be desirable to get rid of * this free()'ing void return continuation. */ -INFO_TABLE_RET( stg_block_async_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL ) +INFO_TABLE_RET( stg_block_async_void, RET_SMALL ) { W_ ares;