/* ----------------------------------------------------------------------------- * * (c) The GHC Team, 1998-2004 * * Canned Heap-Check and Stack-Check sequences. * * This file is written in a subset of C--, extended with various * features specific to GHC. It is compiled by GHC directly. For the * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y. * * ---------------------------------------------------------------------------*/ #include "Cmm.h" #ifdef __PIC__ import pthread_mutex_unlock; #endif import EnterCriticalSection; import LeaveCriticalSection; /* Stack/Heap Check Failure * ------------------------ * * Both heap and stack check failures end up in the same place, so * that we can share the code for the failure case when a proc needs * both a stack check and a heap check (a common case). * * So when we get here, we have to tell the difference between a stack * check failure and a heap check failure. The code for the checks * looks like this: if (Sp - 16 < SpLim) goto c1Tf; Hp = Hp + 16; if (Hp > HpLim) goto c1Th; ... c1Th: HpAlloc = 16; goto c1Tf; c1Tf: jump stg_gc_enter_1 (); * Note that Sp is not decremented by the check, whereas Hp is. The * reasons for this seem to be largely historic, I can't think of a * good reason not to decrement Sp at the check too. (--SDM) * * Note that HpLim may be set to zero arbitrarily by the timer signal * or another processor to trigger a context switch via heap check * failure. * * The job of these fragments (stg_gc_enter_1 and friends) is to * 1. Leave no slop in the heap, so Hp must be retreated if it was * incremented by the check. No-slop is a requirement for LDV * profiling, at least. * 2. If a heap check failed, try to grab another heap block from * the nursery and continue. * 3. otherwise, return to the scheduler with StackOverflow, * HeapOverflow, or ThreadYielding as appropriate. * * We can tell whether Hp was incremented, because HpAlloc is * non-zero: HpAlloc is required to be zero at all times unless a * heap-check just failed, which is why the stack-check failure case * does not set HpAlloc (see code fragment above). So that covers (1). * HpAlloc is zeroed in LOAD_THREAD_STATE(). * * If Hp > HpLim, then either (a) we have reached the end of the * current heap block, or (b) HpLim == 0 and we should yield. Hence * check Hp > HpLim first, and then HpLim == 0 to decide whether to * return ThreadYielding or try to grab another heap block from the * nursery. * * If Hp <= HpLim, then this must be a StackOverflow. The scheduler * will either increase the size of our stack, or raise an exception if * the stack is already too big. */ #define PRE_RETURN(why,what_next) \ StgTSO_what_next(CurrentTSO) = what_next::I16; \ StgRegTable_rRet(BaseReg) = why; \ R1 = BaseReg; /* Remember that the return address is *removed* when returning to a * ThreadRunGHC thread. */ #define GC_GENERIC \ DEBUG_ONLY(foreign "C" heapCheckFail()); \ if (Hp > HpLim) { \ Hp = Hp - HpAlloc/*in bytes*/; \ if (HpLim == 0) { \ R1 = ThreadYielding; \ goto sched; \ } \ if (HpAlloc <= BLOCK_SIZE \ && bdescr_link(CurrentNursery) != NULL) { \ HpAlloc = 0; \ CLOSE_NURSERY(); \ CurrentNursery = bdescr_link(CurrentNursery); \ OPEN_NURSERY(); \ if (Capability_context_switch(MyCapability()) != 0 :: CInt) { \ R1 = ThreadYielding; \ goto sched; \ } else { \ jump %ENTRY_CODE(Sp(0)); \ } \ } else { \ R1 = HeapOverflow; \ goto sched; \ } \ } else { \ R1 = StackOverflow; \ } \ sched: \ PRE_RETURN(R1,ThreadRunGHC); \ jump stg_returnToSched; #define HP_GENERIC \ PRE_RETURN(HeapOverflow, ThreadRunGHC) \ jump stg_returnToSched; #define BLOCK_GENERIC \ PRE_RETURN(ThreadBlocked, ThreadRunGHC) \ jump stg_returnToSched; #define YIELD_GENERIC \ PRE_RETURN(ThreadYielding, ThreadRunGHC) \ jump stg_returnToSched; #define BLOCK_BUT_FIRST(c) \ PRE_RETURN(ThreadBlocked, ThreadRunGHC) \ R2 = c; \ jump stg_returnToSchedButFirst; #define YIELD_TO_INTERPRETER \ PRE_RETURN(ThreadYielding, ThreadInterpret) \ jump stg_returnToSchedNotPaused; /* ----------------------------------------------------------------------------- Heap checks in thunks/functions. In these cases, node always points to the function closure. This gives us an easy way to return to the function: just leave R1 on the top of the stack, and have the scheduler enter it to return. There are canned sequences for 'n' pointer values in registers. -------------------------------------------------------------------------- */ INFO_TABLE_RET( stg_enter, RET_SMALL, P_ unused) { R1 = Sp(1); Sp_adj(2); ENTER(); } __stg_gc_enter_1 { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_enter_info; GC_GENERIC } /* ----------------------------------------------------------------------------- stg_enter_checkbh is just like stg_enter, except that we also call checkBlockingQueues(). The point of this is that the GC can replace an stg_marked_upd_frame with an stg_enter_checkbh if it finds that the BLACKHOLE has already been updated by another thread. It would be unsafe to use stg_enter, because there might be an orphaned BLOCKING_QUEUE now. -------------------------------------------------------------------------- */ INFO_TABLE_RET( stg_enter_checkbh, RET_SMALL, P_ unused) { R1 = Sp(1); Sp_adj(2); foreign "C" checkBlockingQueues(MyCapability() "ptr", CurrentTSO) [R1]; ENTER(); } /* ----------------------------------------------------------------------------- Heap checks in Primitive case alternatives A primitive case alternative is entered with a value either in R1, FloatReg1 or D1 depending on the return convention. All the cases are covered below. -------------------------------------------------------------------------- */ /*-- No Registers live ------------------------------------------------------ */ stg_gc_noregs { GC_GENERIC } /*-- void return ------------------------------------------------------------ */ INFO_TABLE_RET( stg_gc_void, RET_SMALL) { Sp_adj(1); jump %ENTRY_CODE(Sp(0)); } /*-- R1 is boxed/unpointed -------------------------------------------------- */ INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, P_ unused) { R1 = Sp(1); Sp_adj(2); jump %ENTRY_CODE(Sp(0)); } stg_gc_unpt_r1 { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_gc_unpt_r1_info; GC_GENERIC } /*-- R1 is unboxed -------------------------------------------------- */ /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */ INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused ) { R1 = Sp(1); Sp_adj(2); jump %ENTRY_CODE(Sp(0)); } stg_gc_unbx_r1 { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_gc_unbx_r1_info; GC_GENERIC } /*-- F1 contains a float ------------------------------------------------- */ INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused ) { F1 = F_[Sp+WDS(1)]; Sp_adj(2); jump %ENTRY_CODE(Sp(0)); } stg_gc_f1 { Sp_adj(-2); F_[Sp + WDS(1)] = F1; Sp(0) = stg_gc_f1_info; GC_GENERIC } /*-- D1 contains a double ------------------------------------------------- */ INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused ) { D1 = D_[Sp + WDS(1)]; Sp = Sp + WDS(1) + SIZEOF_StgDouble; jump %ENTRY_CODE(Sp(0)); } stg_gc_d1 { Sp = Sp - WDS(1) - SIZEOF_StgDouble; D_[Sp + WDS(1)] = D1; Sp(0) = stg_gc_d1_info; GC_GENERIC } /*-- L1 contains an int64 ------------------------------------------------- */ INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused ) { L1 = L_[Sp + WDS(1)]; Sp_adj(1) + SIZEOF_StgWord64; jump %ENTRY_CODE(Sp(0)); } stg_gc_l1 { Sp_adj(-1) - SIZEOF_StgWord64; L_[Sp + WDS(1)] = L1; Sp(0) = stg_gc_l1_info; GC_GENERIC } /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */ INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused ) { Sp_adj(1); // one ptr is on the stack (Sp(0)) jump %ENTRY_CODE(Sp(1)); } /* ----------------------------------------------------------------------------- Generic function entry heap check code. At a function entry point, the arguments are as per the calling convention, i.e. some in regs and some on the stack. There may or may not be a pointer to the function closure in R1 - if there isn't, then the heap check failure code in the function will arrange to load it. The function's argument types are described in its info table, so we can just jump to this bit of generic code to save away all the registers and return to the scheduler. This code arranges the stack like this: | .... | | args | +---------------------+ | f_closure | +---------------------+ | size | +---------------------+ | stg_gc_fun_info | +---------------------+ The size is the number of words of arguments on the stack, and is cached in the frame in order to simplify stack walking: otherwise the size of this stack frame would have to be calculated by looking at f's info table. -------------------------------------------------------------------------- */ __stg_gc_fun { W_ size; W_ info; W_ type; info = %GET_FUN_INFO(UNTAG(R1)); // cache the size type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info)); } else { if (type == ARG_GEN_BIG) { #ifdef TABLES_NEXT_TO_CODE // bitmap field holds an offset size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) + %GET_ENTRY(UNTAG(R1)) /* ### */ ); #else size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) ); #endif } else { size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]); } } #ifdef NO_ARG_REGS // we don't have to save any registers away Sp_adj(-3); Sp(2) = R1; Sp(1) = size; Sp(0) = stg_gc_fun_info; GC_GENERIC #else W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); // cache the size if (type == ARG_GEN || type == ARG_GEN_BIG) { // regs already saved by the heap check code Sp_adj(-3); Sp(2) = R1; Sp(1) = size; Sp(0) = stg_gc_fun_info; // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)");); GC_GENERIC } else { jump W_[stg_stack_save_entries + WDS(type)]; // jumps to stg_gc_noregs after saving stuff } #endif /* !NO_ARG_REGS */ } /* ----------------------------------------------------------------------------- Generic Apply (return point) The dual to stg_fun_gc_gen (above): this fragment returns to the function, passing arguments in the stack and in registers appropriately. The stack layout is given above. -------------------------------------------------------------------------- */ INFO_TABLE_RET( stg_gc_fun, RET_FUN ) { R1 = Sp(2); Sp_adj(3); #ifdef NO_ARG_REGS // Minor optimisation: there are no argument registers to load up, // so we can just jump straight to the function's entry point. jump %GET_ENTRY(UNTAG(R1)); #else W_ info; W_ type; info = %GET_FUN_INFO(UNTAG(R1)); type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN || type == ARG_GEN_BIG) { jump StgFunInfoExtra_slow_apply(info); } else { if (type == ARG_BCO) { // cover this case just to be on the safe side Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_apply_interp_info; jump stg_yield_to_interpreter; } else { jump W_[stg_ap_stack_entries + WDS(type)]; } } #endif } /* ----------------------------------------------------------------------------- Generic Heap Check Code. Called with Liveness mask in R9, Return address in R10. Stack must be consistent (containing all necessary info pointers to relevant SRTs). See StgMacros.h for a description of the RET_DYN stack frame. We also define an stg_gen_yield here, because it's very similar. -------------------------------------------------------------------------- */ // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P // on a 64-bit machine, we'll end up wasting a couple of words, but // it's not a big deal. #define RESTORE_EVERYTHING \ L1 = L_[Sp + WDS(19)]; \ D2 = D_[Sp + WDS(17)]; \ D1 = D_[Sp + WDS(15)]; \ F4 = F_[Sp + WDS(14)]; \ F3 = F_[Sp + WDS(13)]; \ F2 = F_[Sp + WDS(12)]; \ F1 = F_[Sp + WDS(11)]; \ R8 = Sp(10); \ R7 = Sp(9); \ R6 = Sp(8); \ R5 = Sp(7); \ R4 = Sp(6); \ R3 = Sp(5); \ R2 = Sp(4); \ R1 = Sp(3); \ Sp_adj(21); #define RET_OFFSET (-19) #define SAVE_EVERYTHING \ Sp_adj(-21); \ L_[Sp + WDS(19)] = L1; \ D_[Sp + WDS(17)] = D2; \ D_[Sp + WDS(15)] = D1; \ F_[Sp + WDS(14)] = F4; \ F_[Sp + WDS(13)] = F3; \ F_[Sp + WDS(12)] = F2; \ F_[Sp + WDS(11)] = F1; \ Sp(10) = R8; \ Sp(9) = R7; \ Sp(8) = R6; \ Sp(7) = R5; \ Sp(6) = R4; \ Sp(5) = R3; \ Sp(4) = R2; \ Sp(3) = R1; \ Sp(2) = R10; /* return address */ \ Sp(1) = R9; /* liveness mask */ \ Sp(0) = stg_gc_gen_info; INFO_TABLE_RET( stg_gc_gen, RET_DYN ) /* bitmap in the above info table is unused, the real one is on the stack. */ { RESTORE_EVERYTHING; jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */ } stg_gc_gen { // Hack; see Note [mvar-heap-check] in PrimOps.cmm if (R10 == stg_putMVarzh || R10 == stg_takeMVarzh) { unlockClosure(R1, stg_MVAR_DIRTY_info) } SAVE_EVERYTHING; GC_GENERIC } // A heap check at an unboxed tuple return point. The return address // is on the stack, and we can find it by using the offsets given // to us in the liveness mask. stg_gc_ut { R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9))); SAVE_EVERYTHING; GC_GENERIC } /* * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC * because we've just failed doYouWantToGC(), not a standard heap * check. GC_GENERIC would end up returning StackOverflow. */ stg_gc_gen_hp { SAVE_EVERYTHING; HP_GENERIC } /* ----------------------------------------------------------------------------- Yields -------------------------------------------------------------------------- */ stg_gen_yield { SAVE_EVERYTHING; YIELD_GENERIC } stg_yield_noregs { YIELD_GENERIC; } /* ----------------------------------------------------------------------------- Yielding to the interpreter... top of stack says what to do next. -------------------------------------------------------------------------- */ stg_yield_to_interpreter { YIELD_TO_INTERPRETER; } /* ----------------------------------------------------------------------------- Blocks -------------------------------------------------------------------------- */ stg_gen_block { SAVE_EVERYTHING; BLOCK_GENERIC; } stg_block_noregs { BLOCK_GENERIC; } stg_block_1 { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_enter_info; BLOCK_GENERIC; } /* ----------------------------------------------------------------------------- * takeMVar/putMVar-specific blocks * * Stack layout for a thread blocked in takeMVar: * * ret. addr * ptr to MVar (R1) * stg_block_takemvar_info * * Stack layout for a thread blocked in putMVar: * * ret. addr * ptr to Value (R2) * ptr to MVar (R1) * stg_block_putmvar_info * * See PrimOps.hc for a description of the workings of take/putMVar. * * -------------------------------------------------------------------------- */ INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, P_ unused ) { R1 = Sp(1); Sp_adj(2); jump stg_takeMVarzh; } // code fragment executed just before we return to the scheduler stg_block_takemvar_finally { unlockClosure(R3, stg_MVAR_DIRTY_info); jump StgReturn; } stg_block_takemvar { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_block_takemvar_info; R3 = R1; BLOCK_BUT_FIRST(stg_block_takemvar_finally); } INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, P_ unused1, P_ unused2 ) { R2 = Sp(2); R1 = Sp(1); Sp_adj(3); jump stg_putMVarzh; } // code fragment executed just before we return to the scheduler stg_block_putmvar_finally { unlockClosure(R3, stg_MVAR_DIRTY_info); jump StgReturn; } stg_block_putmvar { Sp_adj(-3); Sp(2) = R2; Sp(1) = R1; Sp(0) = stg_block_putmvar_info; R3 = R1; BLOCK_BUT_FIRST(stg_block_putmvar_finally); } stg_block_blackhole { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_enter_info; BLOCK_GENERIC; } INFO_TABLE_RET( stg_block_throwto, RET_SMALL, P_ unused, P_ unused ) { R2 = Sp(2); R1 = Sp(1); Sp_adj(3); jump stg_killThreadzh; } stg_block_throwto_finally { // unlock the throwto message, but only if it wasn't already // unlocked. It may have been unlocked if we revoked the message // due to an exception being raised during threadPaused(). if (StgHeader_info(StgTSO_block_info(CurrentTSO)) == stg_WHITEHOLE_info) { unlockClosure(StgTSO_block_info(CurrentTSO), stg_MSG_THROWTO_info); } jump StgReturn; } stg_block_throwto { Sp_adj(-3); Sp(2) = R2; Sp(1) = R1; Sp(0) = stg_block_throwto_info; BLOCK_BUT_FIRST(stg_block_throwto_finally); } #ifdef mingw32_HOST_OS INFO_TABLE_RET( stg_block_async, RET_SMALL, W_ unused ) { W_ ares; W_ len, errC; ares = Sp(1); len = StgAsyncIOResult_len(ares); errC = StgAsyncIOResult_errCode(ares); foreign "C" free(ares "ptr"); R1 = len; Sp_adj(1); Sp(0) = errC; jump %ENTRY_CODE(Sp(1)); } stg_block_async { Sp_adj(-2); Sp(0) = stg_block_async_info; BLOCK_GENERIC; } /* Used by threadDelay implementation; it would be desirable to get rid of * this free()'ing void return continuation. */ INFO_TABLE_RET( stg_block_async_void, RET_SMALL, W_ ares ) { W_ ares; ares = Sp(1); foreign "C" free(ares "ptr"); Sp_adj(2); jump %ENTRY_CODE(Sp(0)); } stg_block_async_void { Sp_adj(-2); Sp(0) = stg_block_async_void_info; BLOCK_GENERIC; } #endif /* ----------------------------------------------------------------------------- STM-specific waiting -------------------------------------------------------------------------- */ stg_block_stmwait_finally { foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr"); jump StgReturn; } stg_block_stmwait { BLOCK_BUT_FIRST(stg_block_stmwait_finally); }