1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
16 import pthread_mutex_unlock;
18 import EnterCriticalSection;
19 import LeaveCriticalSection;
21 /* Stack/Heap Check Failure
22 * ------------------------
24 * On discovering that a stack or heap check has failed, we do the following:
26 * - If HpLim==0, indicating that we should context-switch, we yield
27 * to the scheduler (return ThreadYielding).
29 * Note that we must leave no slop in the heap (this is a requirement
30 * for LDV profiling, at least), so if we just had a heap-check
31 * failure, then we must retract Hp by HpAlloc. How do we know
32 * whether there was a heap-check failure? HpLim might be zero, and
33 * yet we got here as a result of a stack-check failure. Hence, we
34 * require that HpAlloc is only non-zero if there was a heap-check
35 * failure, otherwise it is zero, so we can always safely subtract
38 * Hence, HpAlloc is zeroed in LOAD_THREAD_STATE().
40 * - If the context_switch flag is set (the backup plan if setting HpLim
41 * to 0 didn't trigger a context switch), we yield to the scheduler
42 * (return ThreadYielding).
44 * - If Hp > HpLim, we've had a heap check failure. This means we've
45 * come to the end of the current heap block, so we try to chain
46 * another block on with ExtendNursery().
48 * - If this succeeds, we carry on without returning to the
51 * - If it fails, we return to the scheduler claiming HeapOverflow
52 * so that a garbage collection can be performed.
54 * - If Hp <= HpLim, it must have been a stack check that failed. In
55 * which case, we return to the scheduler claiming StackOverflow, the
56 * scheduler will either increase the size of our stack, or raise
57 * an exception if the stack is already too big.
59 * The effect of checking for context switch only in the heap/stack check
60 * failure code is that we'll switch threads after the current thread has
61 * reached the end of its heap block. If a thread isn't allocating
62 * at all, it won't yield. Hopefully this won't be a problem in practice.
65 #define PRE_RETURN(why,what_next) \
66 StgTSO_what_next(CurrentTSO) = what_next::I16; \
67 StgRegTable_rRet(BaseReg) = why; \
70 /* Remember that the return address is *removed* when returning to a
71 * ThreadRunGHC thread.
75 DEBUG_ONLY(foreign "C" heapCheckFail()); \
77 Hp = Hp - HpAlloc/*in bytes*/; \
79 R1 = ThreadYielding; \
82 if (HpAlloc <= BLOCK_SIZE \
83 && bdescr_link(CurrentNursery) != NULL) { \
86 CurrentNursery = bdescr_link(CurrentNursery); \
88 if (Capability_context_switch(MyCapability()) != 0 :: CInt) { \
89 R1 = ThreadYielding; \
92 jump %ENTRY_CODE(Sp(0)); \
102 PRE_RETURN(R1,ThreadRunGHC); \
103 jump stg_returnToSched;
106 PRE_RETURN(HeapOverflow, ThreadRunGHC) \
107 jump stg_returnToSched;
109 #define BLOCK_GENERIC \
110 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
111 jump stg_returnToSched;
113 #define YIELD_GENERIC \
114 PRE_RETURN(ThreadYielding, ThreadRunGHC) \
115 jump stg_returnToSched;
117 #define BLOCK_BUT_FIRST(c) \
118 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
120 jump stg_returnToSchedButFirst;
122 #define YIELD_TO_INTERPRETER \
123 PRE_RETURN(ThreadYielding, ThreadInterpret) \
124 jump stg_returnToSchedNotPaused;
126 /* -----------------------------------------------------------------------------
127 Heap checks in thunks/functions.
129 In these cases, node always points to the function closure. This gives
130 us an easy way to return to the function: just leave R1 on the top of
131 the stack, and have the scheduler enter it to return.
133 There are canned sequences for 'n' pointer values in registers.
134 -------------------------------------------------------------------------- */
136 INFO_TABLE_RET( stg_enter, RET_SMALL, P_ unused)
147 Sp(0) = stg_enter_info;
153 ToDo: merge the block and yield macros, calling something like BLOCK(N)
158 Should we actually ever do a yield in such a case?? -- HWL
163 TSO_what_next(CurrentTSO) = ThreadRunGHC;
173 TSO_what_next(CurrentTSO) = ThreadRunGHC;
178 /*- 2 Regs--------------------------------------------------------------------*/
186 TSO_what_next(CurrentTSO) = ThreadRunGHC;
191 /*- 3 Regs -------------------------------------------------------------------*/
200 TSO_what_next(CurrentTSO) = ThreadRunGHC;
205 /*- 4 Regs -------------------------------------------------------------------*/
215 TSO_what_next(CurrentTSO) = ThreadRunGHC;
220 /*- 5 Regs -------------------------------------------------------------------*/
231 TSO_what_next(CurrentTSO) = ThreadRunGHC;
236 /*- 6 Regs -------------------------------------------------------------------*/
248 TSO_what_next(CurrentTSO) = ThreadRunGHC;
253 /*- 7 Regs -------------------------------------------------------------------*/
266 TSO_what_next(CurrentTSO) = ThreadRunGHC;
271 /*- 8 Regs -------------------------------------------------------------------*/
285 TSO_what_next(CurrentTSO) = ThreadRunGHC;
290 // the same routines but with a block rather than a yield
297 TSO_what_next(CurrentTSO) = ThreadRunGHC;
302 /*- 2 Regs--------------------------------------------------------------------*/
310 TSO_what_next(CurrentTSO) = ThreadRunGHC;
315 /*- 3 Regs -------------------------------------------------------------------*/
324 TSO_what_next(CurrentTSO) = ThreadRunGHC;
329 /*- 4 Regs -------------------------------------------------------------------*/
339 TSO_what_next(CurrentTSO) = ThreadRunGHC;
344 /*- 5 Regs -------------------------------------------------------------------*/
355 TSO_what_next(CurrentTSO) = ThreadRunGHC;
360 /*- 6 Regs -------------------------------------------------------------------*/
372 TSO_what_next(CurrentTSO) = ThreadRunGHC;
377 /*- 7 Regs -------------------------------------------------------------------*/
390 TSO_what_next(CurrentTSO) = ThreadRunGHC;
395 /*- 8 Regs -------------------------------------------------------------------*/
409 TSO_what_next(CurrentTSO) = ThreadRunGHC;
416 #if 0 && defined(PAR)
419 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
420 saving of the thread state from the actual jump via an StgReturn.
421 We need this separation because we call RTS routines in blocking entry codes
422 before jumping back into the RTS (see parallel/FetchMe.hc).
434 TSO_what_next(CurrentTSO) = ThreadRunGHC;
441 /* -----------------------------------------------------------------------------
442 Heap checks in Primitive case alternatives
444 A primitive case alternative is entered with a value either in
445 R1, FloatReg1 or D1 depending on the return convention. All the
446 cases are covered below.
447 -------------------------------------------------------------------------- */
449 /*-- No Registers live ------------------------------------------------------ */
456 /*-- void return ------------------------------------------------------------ */
458 INFO_TABLE_RET( stg_gc_void, RET_SMALL)
461 jump %ENTRY_CODE(Sp(0));
464 /*-- R1 is boxed/unpointed -------------------------------------------------- */
466 INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, P_ unused)
470 jump %ENTRY_CODE(Sp(0));
477 Sp(0) = stg_gc_unpt_r1_info;
481 /*-- R1 is unboxed -------------------------------------------------- */
483 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
484 INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused )
488 jump %ENTRY_CODE(Sp(0));
495 Sp(0) = stg_gc_unbx_r1_info;
499 /*-- F1 contains a float ------------------------------------------------- */
501 INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused )
505 jump %ENTRY_CODE(Sp(0));
511 F_[Sp + WDS(1)] = F1;
512 Sp(0) = stg_gc_f1_info;
516 /*-- D1 contains a double ------------------------------------------------- */
518 INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused )
520 D1 = D_[Sp + WDS(1)];
521 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
522 jump %ENTRY_CODE(Sp(0));
527 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
528 D_[Sp + WDS(1)] = D1;
529 Sp(0) = stg_gc_d1_info;
534 /*-- L1 contains an int64 ------------------------------------------------- */
536 INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
538 L1 = L_[Sp + WDS(1)];
539 Sp_adj(1) + SIZEOF_StgWord64;
540 jump %ENTRY_CODE(Sp(0));
545 Sp_adj(-1) - SIZEOF_StgWord64;
546 L_[Sp + WDS(1)] = L1;
547 Sp(0) = stg_gc_l1_info;
551 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
553 INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused )
556 // one ptr is on the stack (Sp(0))
557 jump %ENTRY_CODE(Sp(1));
560 /* -----------------------------------------------------------------------------
561 Generic function entry heap check code.
563 At a function entry point, the arguments are as per the calling convention,
564 i.e. some in regs and some on the stack. There may or may not be
565 a pointer to the function closure in R1 - if there isn't, then the heap
566 check failure code in the function will arrange to load it.
568 The function's argument types are described in its info table, so we
569 can just jump to this bit of generic code to save away all the
570 registers and return to the scheduler.
572 This code arranges the stack like this:
576 +---------------------+
578 +---------------------+
580 +---------------------+
582 +---------------------+
584 The size is the number of words of arguments on the stack, and is cached
585 in the frame in order to simplify stack walking: otherwise the size of
586 this stack frame would have to be calculated by looking at f's info table.
588 -------------------------------------------------------------------------- */
596 info = %GET_FUN_INFO(UNTAG(R1));
599 type = TO_W_(StgFunInfoExtra_fun_type(info));
600 if (type == ARG_GEN) {
601 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
603 if (type == ARG_GEN_BIG) {
604 #ifdef TABLES_NEXT_TO_CODE
605 // bitmap field holds an offset
606 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
607 + %GET_ENTRY(UNTAG(R1)) /* ### */ );
609 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
612 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
617 // we don't have to save any registers away
621 Sp(0) = stg_gc_fun_info;
625 type = TO_W_(StgFunInfoExtra_fun_type(info));
627 if (type == ARG_GEN || type == ARG_GEN_BIG) {
628 // regs already saved by the heap check code
632 Sp(0) = stg_gc_fun_info;
633 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
636 jump W_[stg_stack_save_entries + WDS(type)];
637 // jumps to stg_gc_noregs after saving stuff
639 #endif /* !NO_ARG_REGS */
642 /* -----------------------------------------------------------------------------
643 Generic Apply (return point)
645 The dual to stg_fun_gc_gen (above): this fragment returns to the
646 function, passing arguments in the stack and in registers
647 appropriately. The stack layout is given above.
648 -------------------------------------------------------------------------- */
650 INFO_TABLE_RET( stg_gc_fun, RET_FUN )
655 // Minor optimisation: there are no argument registers to load up,
656 // so we can just jump straight to the function's entry point.
657 jump %GET_ENTRY(UNTAG(R1));
662 info = %GET_FUN_INFO(UNTAG(R1));
663 type = TO_W_(StgFunInfoExtra_fun_type(info));
664 if (type == ARG_GEN || type == ARG_GEN_BIG) {
665 jump StgFunInfoExtra_slow_apply(info);
667 if (type == ARG_BCO) {
668 // cover this case just to be on the safe side
671 Sp(0) = stg_apply_interp_info;
672 jump stg_yield_to_interpreter;
674 jump W_[stg_ap_stack_entries + WDS(type)];
680 /* -----------------------------------------------------------------------------
681 Generic Heap Check Code.
683 Called with Liveness mask in R9, Return address in R10.
684 Stack must be consistent (containing all necessary info pointers
687 See StgMacros.h for a description of the RET_DYN stack frame.
689 We also define an stg_gen_yield here, because it's very similar.
690 -------------------------------------------------------------------------- */
692 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
693 // on a 64-bit machine, we'll end up wasting a couple of words, but
694 // it's not a big deal.
696 #define RESTORE_EVERYTHING \
697 L1 = L_[Sp + WDS(19)]; \
698 D2 = D_[Sp + WDS(17)]; \
699 D1 = D_[Sp + WDS(15)]; \
700 F4 = F_[Sp + WDS(14)]; \
701 F3 = F_[Sp + WDS(13)]; \
702 F2 = F_[Sp + WDS(12)]; \
703 F1 = F_[Sp + WDS(11)]; \
714 #define RET_OFFSET (-19)
716 #define SAVE_EVERYTHING \
718 L_[Sp + WDS(19)] = L1; \
719 D_[Sp + WDS(17)] = D2; \
720 D_[Sp + WDS(15)] = D1; \
721 F_[Sp + WDS(14)] = F4; \
722 F_[Sp + WDS(13)] = F3; \
723 F_[Sp + WDS(12)] = F2; \
724 F_[Sp + WDS(11)] = F1; \
733 Sp(2) = R10; /* return address */ \
734 Sp(1) = R9; /* liveness mask */ \
735 Sp(0) = stg_gc_gen_info;
737 INFO_TABLE_RET( stg_gc_gen, RET_DYN )
738 /* bitmap in the above info table is unused, the real one is on the stack. */
741 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
750 // A heap check at an unboxed tuple return point. The return address
751 // is on the stack, and we can find it by using the offsets given
752 // to us in the liveness mask.
755 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
761 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
762 * because we've just failed doYouWantToGC(), not a standard heap
763 * check. GC_GENERIC would end up returning StackOverflow.
771 /* -----------------------------------------------------------------------------
773 -------------------------------------------------------------------------- */
786 /* -----------------------------------------------------------------------------
787 Yielding to the interpreter... top of stack says what to do next.
788 -------------------------------------------------------------------------- */
790 stg_yield_to_interpreter
792 YIELD_TO_INTERPRETER;
795 /* -----------------------------------------------------------------------------
797 -------------------------------------------------------------------------- */
814 Sp(0) = stg_enter_info;
818 /* -----------------------------------------------------------------------------
819 * takeMVar/putMVar-specific blocks
821 * Stack layout for a thread blocked in takeMVar:
825 * stg_block_takemvar_info
827 * Stack layout for a thread blocked in putMVar:
832 * stg_block_putmvar_info
834 * See PrimOps.hc for a description of the workings of take/putMVar.
836 * -------------------------------------------------------------------------- */
838 INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, P_ unused )
842 jump takeMVarzh_fast;
845 // code fragment executed just before we return to the scheduler
846 stg_block_takemvar_finally
849 unlockClosure(R3, stg_MVAR_DIRTY_info);
851 SET_INFO(R3, stg_MVAR_DIRTY_info);
860 Sp(0) = stg_block_takemvar_info;
862 BLOCK_BUT_FIRST(stg_block_takemvar_finally);
865 INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, P_ unused1, P_ unused2 )
873 // code fragment executed just before we return to the scheduler
874 stg_block_putmvar_finally
877 unlockClosure(R3, stg_MVAR_DIRTY_info);
879 SET_INFO(R3, stg_MVAR_DIRTY_info);
889 Sp(0) = stg_block_putmvar_info;
891 BLOCK_BUT_FIRST(stg_block_putmvar_finally);
894 // code fragment executed just before we return to the scheduler
895 stg_block_blackhole_finally
897 #if defined(THREADED_RTS)
898 // The last thing we do is release sched_lock, which is
899 // preventing other threads from accessing blackhole_queue and
900 // picking up this thread before we are finished with it.
901 RELEASE_LOCK(sched_mutex "ptr");
910 Sp(0) = stg_enter_info;
911 BLOCK_BUT_FIRST(stg_block_blackhole_finally);
914 INFO_TABLE_RET( stg_block_throwto, RET_SMALL, P_ unused, P_ unused )
919 jump killThreadzh_fast;
922 stg_block_throwto_finally
925 foreign "C" throwToReleaseTarget (R3 "ptr");
935 Sp(0) = stg_block_throwto_info;
936 BLOCK_BUT_FIRST(stg_block_throwto_finally);
939 #ifdef mingw32_HOST_OS
940 INFO_TABLE_RET( stg_block_async, RET_SMALL )
945 ares = StgTSO_block_info(CurrentTSO);
946 len = StgAsyncIOResult_len(ares);
947 errC = StgAsyncIOResult_errCode(ares);
948 StgTSO_block_info(CurrentTSO) = NULL;
949 foreign "C" free(ares "ptr");
952 jump %ENTRY_CODE(Sp(1));
958 Sp(0) = stg_block_async_info;
962 /* Used by threadDelay implementation; it would be desirable to get rid of
963 * this free()'ing void return continuation.
965 INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
969 ares = StgTSO_block_info(CurrentTSO);
970 StgTSO_block_info(CurrentTSO) = NULL;
971 foreign "C" free(ares "ptr");
973 jump %ENTRY_CODE(Sp(0));
979 Sp(0) = stg_block_async_void_info;
985 /* -----------------------------------------------------------------------------
987 -------------------------------------------------------------------------- */
989 stg_block_stmwait_finally
991 foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
997 BLOCK_BUT_FIRST(stg_block_stmwait_finally);