1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
16 import pthread_mutex_unlock;
18 import EnterCriticalSection;
19 import LeaveCriticalSection;
21 /* Stack/Heap Check Failure
22 * ------------------------
24 * On discovering that a stack or heap check has failed, we do the following:
26 * - If the context_switch flag is set, indicating that there are more
27 * threads waiting to run, we yield to the scheduler
28 * (return ThreadYielding).
30 * - If Hp > HpLim, we've had a heap check failure. This means we've
31 * come to the end of the current heap block, so we try to chain
32 * another block on with ExtendNursery().
34 * - If this succeeds, we carry on without returning to the
37 * - If it fails, we return to the scheduler claiming HeapOverflow
38 * so that a garbage collection can be performed.
40 * - If Hp <= HpLim, it must have been a stack check that failed. In
41 * which case, we return to the scheduler claiming StackOverflow, the
42 * scheduler will either increase the size of our stack, or raise
43 * an exception if the stack is already too big.
45 * The effect of checking for context switch only in the heap/stack check
46 * failure code is that we'll switch threads after the current thread has
47 * reached the end of its heap block. If a thread isn't allocating
48 * at all, it won't yield. Hopefully this won't be a problem in practice.
51 #define PRE_RETURN(why,what_next) \
52 StgTSO_what_next(CurrentTSO) = what_next::I16; \
53 StgRegTable_rRet(BaseReg) = why; \
56 /* Remember that the return address is *removed* when returning to a
57 * ThreadRunGHC thread.
61 DEBUG_ONLY(foreign "C" heapCheckFail()); \
63 Hp = Hp - HpAlloc/*in bytes*/; \
64 if (HpAlloc <= BLOCK_SIZE \
65 && bdescr_link(CurrentNursery) != NULL) { \
67 CurrentNursery = bdescr_link(CurrentNursery); \
69 if (CInt[context_switch] != 0 :: CInt) { \
70 R1 = ThreadYielding; \
73 jump %ENTRY_CODE(Sp(0)); \
83 PRE_RETURN(R1,ThreadRunGHC); \
84 jump stg_returnToSched;
87 PRE_RETURN(HeapOverflow, ThreadRunGHC) \
88 jump stg_returnToSched;
90 #define BLOCK_GENERIC \
91 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
92 jump stg_returnToSched;
94 #define YIELD_GENERIC \
95 PRE_RETURN(ThreadYielding, ThreadRunGHC) \
96 jump stg_returnToSched;
98 #define BLOCK_BUT_FIRST(c) \
99 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
101 jump stg_returnToSchedButFirst;
103 #define YIELD_TO_INTERPRETER \
104 PRE_RETURN(ThreadYielding, ThreadInterpret) \
105 jump stg_returnToSchedNotPaused;
107 /* -----------------------------------------------------------------------------
108 Heap checks in thunks/functions.
110 In these cases, node always points to the function closure. This gives
111 us an easy way to return to the function: just leave R1 on the top of
112 the stack, and have the scheduler enter it to return.
114 There are canned sequences for 'n' pointer values in registers.
115 -------------------------------------------------------------------------- */
117 INFO_TABLE_RET( stg_enter, RET_SMALL, "ptr" W_ unused)
128 Sp(0) = stg_enter_info;
134 ToDo: merge the block and yield macros, calling something like BLOCK(N)
139 Should we actually ever do a yield in such a case?? -- HWL
144 TSO_what_next(CurrentTSO) = ThreadRunGHC;
154 TSO_what_next(CurrentTSO) = ThreadRunGHC;
159 /*- 2 Regs--------------------------------------------------------------------*/
167 TSO_what_next(CurrentTSO) = ThreadRunGHC;
172 /*- 3 Regs -------------------------------------------------------------------*/
181 TSO_what_next(CurrentTSO) = ThreadRunGHC;
186 /*- 4 Regs -------------------------------------------------------------------*/
196 TSO_what_next(CurrentTSO) = ThreadRunGHC;
201 /*- 5 Regs -------------------------------------------------------------------*/
212 TSO_what_next(CurrentTSO) = ThreadRunGHC;
217 /*- 6 Regs -------------------------------------------------------------------*/
229 TSO_what_next(CurrentTSO) = ThreadRunGHC;
234 /*- 7 Regs -------------------------------------------------------------------*/
247 TSO_what_next(CurrentTSO) = ThreadRunGHC;
252 /*- 8 Regs -------------------------------------------------------------------*/
266 TSO_what_next(CurrentTSO) = ThreadRunGHC;
271 // the same routines but with a block rather than a yield
278 TSO_what_next(CurrentTSO) = ThreadRunGHC;
283 /*- 2 Regs--------------------------------------------------------------------*/
291 TSO_what_next(CurrentTSO) = ThreadRunGHC;
296 /*- 3 Regs -------------------------------------------------------------------*/
305 TSO_what_next(CurrentTSO) = ThreadRunGHC;
310 /*- 4 Regs -------------------------------------------------------------------*/
320 TSO_what_next(CurrentTSO) = ThreadRunGHC;
325 /*- 5 Regs -------------------------------------------------------------------*/
336 TSO_what_next(CurrentTSO) = ThreadRunGHC;
341 /*- 6 Regs -------------------------------------------------------------------*/
353 TSO_what_next(CurrentTSO) = ThreadRunGHC;
358 /*- 7 Regs -------------------------------------------------------------------*/
371 TSO_what_next(CurrentTSO) = ThreadRunGHC;
376 /*- 8 Regs -------------------------------------------------------------------*/
390 TSO_what_next(CurrentTSO) = ThreadRunGHC;
397 #if 0 && defined(PAR)
400 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
401 saving of the thread state from the actual jump via an StgReturn.
402 We need this separation because we call RTS routines in blocking entry codes
403 before jumping back into the RTS (see parallel/FetchMe.hc).
415 TSO_what_next(CurrentTSO) = ThreadRunGHC;
422 /* -----------------------------------------------------------------------------
423 Heap checks in Primitive case alternatives
425 A primitive case alternative is entered with a value either in
426 R1, FloatReg1 or D1 depending on the return convention. All the
427 cases are covered below.
428 -------------------------------------------------------------------------- */
430 /*-- No Registers live ------------------------------------------------------ */
437 /*-- void return ------------------------------------------------------------ */
439 INFO_TABLE_RET( stg_gc_void, RET_SMALL)
442 jump %ENTRY_CODE(Sp(0));
445 /*-- R1 is boxed/unpointed -------------------------------------------------- */
447 INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, "ptr" W_ unused)
451 jump %ENTRY_CODE(Sp(0));
458 Sp(0) = stg_gc_unpt_r1_info;
462 /*-- R1 is unboxed -------------------------------------------------- */
464 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
465 INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused )
469 jump %ENTRY_CODE(Sp(0));
476 Sp(0) = stg_gc_unbx_r1_info;
480 /*-- F1 contains a float ------------------------------------------------- */
482 INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused )
486 jump %ENTRY_CODE(Sp(0));
492 F_[Sp + WDS(1)] = F1;
493 Sp(0) = stg_gc_f1_info;
497 /*-- D1 contains a double ------------------------------------------------- */
499 INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused )
501 D1 = D_[Sp + WDS(1)];
502 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
503 jump %ENTRY_CODE(Sp(0));
508 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
509 D_[Sp + WDS(1)] = D1;
510 Sp(0) = stg_gc_d1_info;
515 /*-- L1 contains an int64 ------------------------------------------------- */
517 INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
519 L1 = L_[Sp + WDS(1)];
520 Sp_adj(1) + SIZEOF_StgWord64;
521 jump %ENTRY_CODE(Sp(0));
526 Sp_adj(-1) - SIZEOF_StgWord64;
527 L_[Sp + WDS(1)] = L1;
528 Sp(0) = stg_gc_l1_info;
532 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
534 INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused )
537 // one ptr is on the stack (Sp(0))
538 jump %ENTRY_CODE(Sp(1));
541 /* -----------------------------------------------------------------------------
542 Generic function entry heap check code.
544 At a function entry point, the arguments are as per the calling convention,
545 i.e. some in regs and some on the stack. There may or may not be
546 a pointer to the function closure in R1 - if there isn't, then the heap
547 check failure code in the function will arrange to load it.
549 The function's argument types are described in its info table, so we
550 can just jump to this bit of generic code to save away all the
551 registers and return to the scheduler.
553 This code arranges the stack like this:
557 +---------------------+
559 +---------------------+
561 +---------------------+
563 +---------------------+
565 The size is the number of words of arguments on the stack, and is cached
566 in the frame in order to simplify stack walking: otherwise the size of
567 this stack frame would have to be calculated by looking at f's info table.
569 -------------------------------------------------------------------------- */
577 info = %GET_FUN_INFO(UNTAG(R1));
580 type = TO_W_(StgFunInfoExtra_fun_type(info));
581 if (type == ARG_GEN) {
582 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
584 if (type == ARG_GEN_BIG) {
585 #ifdef TABLES_NEXT_TO_CODE
586 // bitmap field holds an offset
587 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
588 + %GET_ENTRY(UNTAG(R1)) /* ### */ );
590 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
593 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
598 // we don't have to save any registers away
602 Sp(0) = stg_gc_fun_info;
606 type = TO_W_(StgFunInfoExtra_fun_type(info));
608 if (type == ARG_GEN || type == ARG_GEN_BIG) {
609 // regs already saved by the heap check code
613 Sp(0) = stg_gc_fun_info;
614 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
617 jump W_[stg_stack_save_entries + WDS(type)];
618 // jumps to stg_gc_noregs after saving stuff
620 #endif /* !NO_ARG_REGS */
623 /* -----------------------------------------------------------------------------
624 Generic Apply (return point)
626 The dual to stg_fun_gc_gen (above): this fragment returns to the
627 function, passing arguments in the stack and in registers
628 appropriately. The stack layout is given above.
629 -------------------------------------------------------------------------- */
631 INFO_TABLE_RET( stg_gc_fun, RET_FUN )
636 // Minor optimisation: there are no argument registers to load up,
637 // so we can just jump straight to the function's entry point.
638 jump %GET_ENTRY(UNTAG(R1));
643 info = %GET_FUN_INFO(UNTAG(R1));
644 type = TO_W_(StgFunInfoExtra_fun_type(info));
645 if (type == ARG_GEN || type == ARG_GEN_BIG) {
646 jump StgFunInfoExtra_slow_apply(info);
648 if (type == ARG_BCO) {
649 // cover this case just to be on the safe side
652 Sp(0) = stg_apply_interp_info;
653 jump stg_yield_to_interpreter;
655 jump W_[stg_ap_stack_entries + WDS(type)];
661 /* -----------------------------------------------------------------------------
662 Generic Heap Check Code.
664 Called with Liveness mask in R9, Return address in R10.
665 Stack must be consistent (containing all necessary info pointers
668 See StgMacros.h for a description of the RET_DYN stack frame.
670 We also define an stg_gen_yield here, because it's very similar.
671 -------------------------------------------------------------------------- */
673 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
674 // on a 64-bit machine, we'll end up wasting a couple of words, but
675 // it's not a big deal.
677 #define RESTORE_EVERYTHING \
678 L1 = L_[Sp + WDS(19)]; \
679 D2 = D_[Sp + WDS(17)]; \
680 D1 = D_[Sp + WDS(15)]; \
681 F4 = F_[Sp + WDS(14)]; \
682 F3 = F_[Sp + WDS(13)]; \
683 F2 = F_[Sp + WDS(12)]; \
684 F1 = F_[Sp + WDS(11)]; \
695 #define RET_OFFSET (-19)
697 #define SAVE_EVERYTHING \
699 L_[Sp + WDS(19)] = L1; \
700 D_[Sp + WDS(17)] = D2; \
701 D_[Sp + WDS(15)] = D1; \
702 F_[Sp + WDS(14)] = F4; \
703 F_[Sp + WDS(13)] = F3; \
704 F_[Sp + WDS(12)] = F2; \
705 F_[Sp + WDS(11)] = F1; \
714 Sp(2) = R10; /* return address */ \
715 Sp(1) = R9; /* liveness mask */ \
716 Sp(0) = stg_gc_gen_info;
718 INFO_TABLE_RET( stg_gc_gen, RET_DYN )
719 /* bitmap in the above info table is unused, the real one is on the stack. */
722 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
731 // A heap check at an unboxed tuple return point. The return address
732 // is on the stack, and we can find it by using the offsets given
733 // to us in the liveness mask.
736 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
742 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
743 * because we've just failed doYouWantToGC(), not a standard heap
744 * check. GC_GENERIC would end up returning StackOverflow.
752 /* -----------------------------------------------------------------------------
754 -------------------------------------------------------------------------- */
767 /* -----------------------------------------------------------------------------
768 Yielding to the interpreter... top of stack says what to do next.
769 -------------------------------------------------------------------------- */
771 stg_yield_to_interpreter
773 YIELD_TO_INTERPRETER;
776 /* -----------------------------------------------------------------------------
778 -------------------------------------------------------------------------- */
795 Sp(0) = stg_enter_info;
799 /* -----------------------------------------------------------------------------
800 * takeMVar/putMVar-specific blocks
802 * Stack layout for a thread blocked in takeMVar:
806 * stg_block_takemvar_info
808 * Stack layout for a thread blocked in putMVar:
813 * stg_block_putmvar_info
815 * See PrimOps.hc for a description of the workings of take/putMVar.
817 * -------------------------------------------------------------------------- */
819 INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, "ptr" W_ unused )
823 jump takeMVarzh_fast;
826 // code fragment executed just before we return to the scheduler
827 stg_block_takemvar_finally
830 unlockClosure(R3, stg_MVAR_DIRTY_info);
832 SET_INFO(R3, stg_MVAR_DIRTY_info);
841 Sp(0) = stg_block_takemvar_info;
843 BLOCK_BUT_FIRST(stg_block_takemvar_finally);
846 INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, "ptr" W_ unused1, "ptr" W_ unused2 )
854 // code fragment executed just before we return to the scheduler
855 stg_block_putmvar_finally
858 unlockClosure(R3, stg_MVAR_DIRTY_info);
860 SET_INFO(R3, stg_MVAR_DIRTY_info);
870 Sp(0) = stg_block_putmvar_info;
872 BLOCK_BUT_FIRST(stg_block_putmvar_finally);
875 // code fragment executed just before we return to the scheduler
876 stg_block_blackhole_finally
878 #if defined(THREADED_RTS)
879 // The last thing we do is release sched_lock, which is
880 // preventing other threads from accessing blackhole_queue and
881 // picking up this thread before we are finished with it.
882 RELEASE_LOCK(sched_mutex "ptr");
891 Sp(0) = stg_enter_info;
892 BLOCK_BUT_FIRST(stg_block_blackhole_finally);
895 INFO_TABLE_RET( stg_block_throwto, RET_SMALL, "ptr" W_ unused, "ptr" W_ unused )
900 jump killThreadzh_fast;
903 stg_block_throwto_finally
906 foreign "C" throwToReleaseTarget (R3 "ptr");
916 Sp(0) = stg_block_throwto_info;
917 BLOCK_BUT_FIRST(stg_block_throwto_finally);
920 #ifdef mingw32_HOST_OS
921 INFO_TABLE_RET( stg_block_async, RET_SMALL )
926 ares = StgTSO_block_info(CurrentTSO);
927 len = StgAsyncIOResult_len(ares);
928 errC = StgAsyncIOResult_errCode(ares);
929 StgTSO_block_info(CurrentTSO) = NULL;
930 foreign "C" free(ares "ptr");
933 jump %ENTRY_CODE(Sp(1));
939 Sp(0) = stg_block_async_info;
943 /* Used by threadDelay implementation; it would be desirable to get rid of
944 * this free()'ing void return continuation.
946 INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
950 ares = StgTSO_block_info(CurrentTSO);
951 StgTSO_block_info(CurrentTSO) = NULL;
952 foreign "C" free(ares "ptr");
954 jump %ENTRY_CODE(Sp(0));
960 Sp(0) = stg_block_async_void_info;
966 /* -----------------------------------------------------------------------------
968 -------------------------------------------------------------------------- */
970 stg_block_stmwait_finally
972 foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
978 BLOCK_BUT_FIRST(stg_block_stmwait_finally);