1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
16 import pthread_mutex_unlock;
19 /* Stack/Heap Check Failure
20 * ------------------------
22 * On discovering that a stack or heap check has failed, we do the following:
24 * - If the context_switch flag is set, indicating that there are more
25 * threads waiting to run, we yield to the scheduler
26 * (return ThreadYielding).
28 * - If Hp > HpLim, we've had a heap check failure. This means we've
29 * come to the end of the current heap block, so we try to chain
30 * another block on with ExtendNursery().
32 * - If this succeeds, we carry on without returning to the
35 * - If it fails, we return to the scheduler claiming HeapOverflow
36 * so that a garbage collection can be performed.
38 * - If Hp <= HpLim, it must have been a stack check that failed. In
39 * which case, we return to the scheduler claiming StackOverflow, the
40 * scheduler will either increase the size of our stack, or raise
41 * an exception if the stack is already too big.
43 * The effect of checking for context switch only in the heap/stack check
44 * failure code is that we'll switch threads after the current thread has
45 * reached the end of its heap block. If a thread isn't allocating
46 * at all, it won't yield. Hopefully this won't be a problem in practice.
49 #define PRE_RETURN(why,what_next) \
50 StgTSO_what_next(CurrentTSO) = what_next::I16; \
51 StgRegTable_rRet(BaseReg) = why; \
54 /* Remember that the return address is *removed* when returning to a
55 * ThreadRunGHC thread.
59 DEBUG_ONLY(foreign "C" heapCheckFail()); \
61 Hp = Hp - HpAlloc/*in bytes*/; \
62 if (HpAlloc <= BLOCK_SIZE \
63 && bdescr_link(CurrentNursery) != NULL) { \
65 CurrentNursery = bdescr_link(CurrentNursery); \
67 if (CInt[context_switch] != 0 :: CInt) { \
68 R1 = ThreadYielding; \
71 jump %ENTRY_CODE(Sp(0)); \
81 PRE_RETURN(R1,ThreadRunGHC); \
82 jump stg_returnToSched;
85 PRE_RETURN(HeapOverflow, ThreadRunGHC) \
86 jump stg_returnToSched;
88 #define BLOCK_GENERIC \
89 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
90 jump stg_returnToSched;
92 #define YIELD_GENERIC \
93 PRE_RETURN(ThreadYielding, ThreadRunGHC) \
94 jump stg_returnToSched;
96 #define BLOCK_BUT_FIRST(c) \
97 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
99 jump stg_returnToSchedButFirst;
101 #define YIELD_TO_INTERPRETER \
102 PRE_RETURN(ThreadYielding, ThreadInterpret) \
103 jump stg_returnToSchedNotPaused;
105 /* -----------------------------------------------------------------------------
106 Heap checks in thunks/functions.
108 In these cases, node always points to the function closure. This gives
109 us an easy way to return to the function: just leave R1 on the top of
110 the stack, and have the scheduler enter it to return.
112 There are canned sequences for 'n' pointer values in registers.
113 -------------------------------------------------------------------------- */
115 INFO_TABLE_RET( stg_enter, RET_SMALL, "ptr" W_ unused)
126 Sp(0) = stg_enter_info;
132 ToDo: merge the block and yield macros, calling something like BLOCK(N)
137 Should we actually ever do a yield in such a case?? -- HWL
142 TSO_what_next(CurrentTSO) = ThreadRunGHC;
152 TSO_what_next(CurrentTSO) = ThreadRunGHC;
157 /*- 2 Regs--------------------------------------------------------------------*/
165 TSO_what_next(CurrentTSO) = ThreadRunGHC;
170 /*- 3 Regs -------------------------------------------------------------------*/
179 TSO_what_next(CurrentTSO) = ThreadRunGHC;
184 /*- 4 Regs -------------------------------------------------------------------*/
194 TSO_what_next(CurrentTSO) = ThreadRunGHC;
199 /*- 5 Regs -------------------------------------------------------------------*/
210 TSO_what_next(CurrentTSO) = ThreadRunGHC;
215 /*- 6 Regs -------------------------------------------------------------------*/
227 TSO_what_next(CurrentTSO) = ThreadRunGHC;
232 /*- 7 Regs -------------------------------------------------------------------*/
245 TSO_what_next(CurrentTSO) = ThreadRunGHC;
250 /*- 8 Regs -------------------------------------------------------------------*/
264 TSO_what_next(CurrentTSO) = ThreadRunGHC;
269 // the same routines but with a block rather than a yield
276 TSO_what_next(CurrentTSO) = ThreadRunGHC;
281 /*- 2 Regs--------------------------------------------------------------------*/
289 TSO_what_next(CurrentTSO) = ThreadRunGHC;
294 /*- 3 Regs -------------------------------------------------------------------*/
303 TSO_what_next(CurrentTSO) = ThreadRunGHC;
308 /*- 4 Regs -------------------------------------------------------------------*/
318 TSO_what_next(CurrentTSO) = ThreadRunGHC;
323 /*- 5 Regs -------------------------------------------------------------------*/
334 TSO_what_next(CurrentTSO) = ThreadRunGHC;
339 /*- 6 Regs -------------------------------------------------------------------*/
351 TSO_what_next(CurrentTSO) = ThreadRunGHC;
356 /*- 7 Regs -------------------------------------------------------------------*/
369 TSO_what_next(CurrentTSO) = ThreadRunGHC;
374 /*- 8 Regs -------------------------------------------------------------------*/
388 TSO_what_next(CurrentTSO) = ThreadRunGHC;
395 #if 0 && defined(PAR)
398 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
399 saving of the thread state from the actual jump via an StgReturn.
400 We need this separation because we call RTS routines in blocking entry codes
401 before jumping back into the RTS (see parallel/FetchMe.hc).
413 TSO_what_next(CurrentTSO) = ThreadRunGHC;
420 /* -----------------------------------------------------------------------------
421 Heap checks in Primitive case alternatives
423 A primitive case alternative is entered with a value either in
424 R1, FloatReg1 or D1 depending on the return convention. All the
425 cases are covered below.
426 -------------------------------------------------------------------------- */
428 /*-- No Registers live ------------------------------------------------------ */
435 /*-- void return ------------------------------------------------------------ */
437 INFO_TABLE_RET( stg_gc_void, RET_SMALL)
440 jump %ENTRY_CODE(Sp(0));
443 /*-- R1 is boxed/unpointed -------------------------------------------------- */
445 INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, "ptr" W_ unused)
449 jump %ENTRY_CODE(Sp(0));
456 Sp(0) = stg_gc_unpt_r1_info;
460 /*-- R1 is unboxed -------------------------------------------------- */
462 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
463 INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused )
467 jump %ENTRY_CODE(Sp(0));
474 Sp(0) = stg_gc_unbx_r1_info;
478 /*-- F1 contains a float ------------------------------------------------- */
480 INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused )
484 jump %ENTRY_CODE(Sp(0));
490 F_[Sp + WDS(1)] = F1;
491 Sp(0) = stg_gc_f1_info;
495 /*-- D1 contains a double ------------------------------------------------- */
497 INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused )
499 D1 = D_[Sp + WDS(1)];
500 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
501 jump %ENTRY_CODE(Sp(0));
506 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
507 D_[Sp + WDS(1)] = D1;
508 Sp(0) = stg_gc_d1_info;
513 /*-- L1 contains an int64 ------------------------------------------------- */
515 INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
517 L1 = L_[Sp + WDS(1)];
518 Sp_adj(1) + SIZEOF_StgWord64;
519 jump %ENTRY_CODE(Sp(0));
524 Sp_adj(-1) - SIZEOF_StgWord64;
525 L_[Sp + WDS(1)] = L1;
526 Sp(0) = stg_gc_l1_info;
530 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
532 INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused )
535 // one ptr is on the stack (Sp(0))
536 jump %ENTRY_CODE(Sp(1));
539 /* -----------------------------------------------------------------------------
540 Generic function entry heap check code.
542 At a function entry point, the arguments are as per the calling convention,
543 i.e. some in regs and some on the stack. There may or may not be
544 a pointer to the function closure in R1 - if there isn't, then the heap
545 check failure code in the function will arrange to load it.
547 The function's argument types are described in its info table, so we
548 can just jump to this bit of generic code to save away all the
549 registers and return to the scheduler.
551 This code arranges the stack like this:
555 +---------------------+
557 +---------------------+
559 +---------------------+
561 +---------------------+
563 The size is the number of words of arguments on the stack, and is cached
564 in the frame in order to simplify stack walking: otherwise the size of
565 this stack frame would have to be calculated by looking at f's info table.
567 -------------------------------------------------------------------------- */
575 info = %GET_FUN_INFO(UNTAG(R1));
578 type = TO_W_(StgFunInfoExtra_fun_type(info));
579 if (type == ARG_GEN) {
580 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
582 if (type == ARG_GEN_BIG) {
583 #ifdef TABLES_NEXT_TO_CODE
584 // bitmap field holds an offset
585 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
586 + %GET_ENTRY(UNTAG(R1)) /* ### */ );
588 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
591 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
596 // we don't have to save any registers away
600 Sp(0) = stg_gc_fun_info;
604 type = TO_W_(StgFunInfoExtra_fun_type(info));
606 if (type == ARG_GEN || type == ARG_GEN_BIG) {
607 // regs already saved by the heap check code
611 Sp(0) = stg_gc_fun_info;
612 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
615 jump W_[stg_stack_save_entries + WDS(type)];
616 // jumps to stg_gc_noregs after saving stuff
618 #endif /* !NO_ARG_REGS */
621 /* -----------------------------------------------------------------------------
622 Generic Apply (return point)
624 The dual to stg_fun_gc_gen (above): this fragment returns to the
625 function, passing arguments in the stack and in registers
626 appropriately. The stack layout is given above.
627 -------------------------------------------------------------------------- */
629 INFO_TABLE_RET( stg_gc_fun, RET_FUN )
634 // Minor optimisation: there are no argument registers to load up,
635 // so we can just jump straight to the function's entry point.
636 jump %GET_ENTRY(UNTAG(R1));
641 info = %GET_FUN_INFO(UNTAG(R1));
642 type = TO_W_(StgFunInfoExtra_fun_type(info));
643 if (type == ARG_GEN || type == ARG_GEN_BIG) {
644 jump StgFunInfoExtra_slow_apply(info);
646 if (type == ARG_BCO) {
647 // cover this case just to be on the safe side
650 Sp(0) = stg_apply_interp_info;
651 jump stg_yield_to_interpreter;
653 jump W_[stg_ap_stack_entries + WDS(type)];
659 /* -----------------------------------------------------------------------------
660 Generic Heap Check Code.
662 Called with Liveness mask in R9, Return address in R10.
663 Stack must be consistent (containing all necessary info pointers
666 See StgMacros.h for a description of the RET_DYN stack frame.
668 We also define an stg_gen_yield here, because it's very similar.
669 -------------------------------------------------------------------------- */
671 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
672 // on a 64-bit machine, we'll end up wasting a couple of words, but
673 // it's not a big deal.
675 #define RESTORE_EVERYTHING \
676 L1 = L_[Sp + WDS(19)]; \
677 D2 = D_[Sp + WDS(17)]; \
678 D1 = D_[Sp + WDS(15)]; \
679 F4 = F_[Sp + WDS(14)]; \
680 F3 = F_[Sp + WDS(13)]; \
681 F2 = F_[Sp + WDS(12)]; \
682 F1 = F_[Sp + WDS(11)]; \
693 #define RET_OFFSET (-19)
695 #define SAVE_EVERYTHING \
697 L_[Sp + WDS(19)] = L1; \
698 D_[Sp + WDS(17)] = D2; \
699 D_[Sp + WDS(15)] = D1; \
700 F_[Sp + WDS(14)] = F4; \
701 F_[Sp + WDS(13)] = F3; \
702 F_[Sp + WDS(12)] = F2; \
703 F_[Sp + WDS(11)] = F1; \
712 Sp(2) = R10; /* return address */ \
713 Sp(1) = R9; /* liveness mask */ \
714 Sp(0) = stg_gc_gen_info;
716 INFO_TABLE_RET( stg_gc_gen, RET_DYN )
717 /* bitmap in the above info table is unused, the real one is on the stack. */
720 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
729 // A heap check at an unboxed tuple return point. The return address
730 // is on the stack, and we can find it by using the offsets given
731 // to us in the liveness mask.
734 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
740 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
741 * because we've just failed doYouWantToGC(), not a standard heap
742 * check. GC_GENERIC would end up returning StackOverflow.
750 /* -----------------------------------------------------------------------------
752 -------------------------------------------------------------------------- */
765 /* -----------------------------------------------------------------------------
766 Yielding to the interpreter... top of stack says what to do next.
767 -------------------------------------------------------------------------- */
769 stg_yield_to_interpreter
771 YIELD_TO_INTERPRETER;
774 /* -----------------------------------------------------------------------------
776 -------------------------------------------------------------------------- */
793 Sp(0) = stg_enter_info;
797 /* -----------------------------------------------------------------------------
798 * takeMVar/putMVar-specific blocks
800 * Stack layout for a thread blocked in takeMVar:
804 * stg_block_takemvar_info
806 * Stack layout for a thread blocked in putMVar:
811 * stg_block_putmvar_info
813 * See PrimOps.hc for a description of the workings of take/putMVar.
815 * -------------------------------------------------------------------------- */
817 INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, "ptr" W_ unused )
821 jump takeMVarzh_fast;
824 // code fragment executed just before we return to the scheduler
825 stg_block_takemvar_finally
828 unlockClosure(R3, stg_EMPTY_MVAR_info);
837 Sp(0) = stg_block_takemvar_info;
839 BLOCK_BUT_FIRST(stg_block_takemvar_finally);
842 INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, "ptr" W_ unused1, "ptr" W_ unused2 )
850 // code fragment executed just before we return to the scheduler
851 stg_block_putmvar_finally
854 unlockClosure(R3, stg_FULL_MVAR_info);
864 Sp(0) = stg_block_putmvar_info;
866 BLOCK_BUT_FIRST(stg_block_putmvar_finally);
869 // code fragment executed just before we return to the scheduler
870 stg_block_blackhole_finally
872 #if defined(THREADED_RTS)
873 // The last thing we do is release sched_lock, which is
874 // preventing other threads from accessing blackhole_queue and
875 // picking up this thread before we are finished with it.
876 foreign "C" RELEASE_LOCK(sched_mutex "ptr");
885 Sp(0) = stg_enter_info;
886 BLOCK_BUT_FIRST(stg_block_blackhole_finally);
889 INFO_TABLE_RET( stg_block_throwto, RET_SMALL, "ptr" W_ unused, "ptr" W_ unused )
894 jump killThreadzh_fast;
897 stg_block_throwto_finally
900 foreign "C" throwToReleaseTarget (R3 "ptr");
910 Sp(0) = stg_block_throwto_info;
911 BLOCK_BUT_FIRST(stg_block_throwto_finally);
914 #ifdef mingw32_HOST_OS
915 INFO_TABLE_RET( stg_block_async, RET_SMALL )
920 ares = StgTSO_block_info(CurrentTSO);
921 len = StgAsyncIOResult_len(ares);
922 errC = StgAsyncIOResult_errCode(ares);
923 StgTSO_block_info(CurrentTSO) = NULL;
924 foreign "C" free(ares "ptr");
927 jump %ENTRY_CODE(Sp(1));
933 Sp(0) = stg_block_async_info;
937 /* Used by threadDelay implementation; it would be desirable to get rid of
938 * this free()'ing void return continuation.
940 INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
944 ares = StgTSO_block_info(CurrentTSO);
945 StgTSO_block_info(CurrentTSO) = NULL;
946 foreign "C" free(ares "ptr");
948 jump %ENTRY_CODE(Sp(0));
954 Sp(0) = stg_block_async_void_info;
960 /* -----------------------------------------------------------------------------
962 -------------------------------------------------------------------------- */
964 stg_block_stmwait_finally
966 foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
972 BLOCK_BUT_FIRST(stg_block_stmwait_finally);