1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
16 import pthread_mutex_unlock;
18 import EnterCriticalSection;
19 import LeaveCriticalSection;
21 /* Stack/Heap Check Failure
22 * ------------------------
24 * On discovering that a stack or heap check has failed, we do the following:
26 * - If HpLim==0, indicating that we should context-switch, we yield
27 * to the scheduler (return ThreadYielding).
29 * - If the context_switch flag is set (the backup plan if setting HpLim
30 * to 0 didn't trigger a context switch), we yield to the scheduler
31 * (return ThreadYielding).
33 * - If Hp > HpLim, we've had a heap check failure. This means we've
34 * come to the end of the current heap block, so we try to chain
35 * another block on with ExtendNursery().
37 * - If this succeeds, we carry on without returning to the
40 * - If it fails, we return to the scheduler claiming HeapOverflow
41 * so that a garbage collection can be performed.
43 * - If Hp <= HpLim, it must have been a stack check that failed. In
44 * which case, we return to the scheduler claiming StackOverflow, the
45 * scheduler will either increase the size of our stack, or raise
46 * an exception if the stack is already too big.
48 * The effect of checking for context switch only in the heap/stack check
49 * failure code is that we'll switch threads after the current thread has
50 * reached the end of its heap block. If a thread isn't allocating
51 * at all, it won't yield. Hopefully this won't be a problem in practice.
54 #define PRE_RETURN(why,what_next) \
55 StgTSO_what_next(CurrentTSO) = what_next::I16; \
56 StgRegTable_rRet(BaseReg) = why; \
59 /* Remember that the return address is *removed* when returning to a
60 * ThreadRunGHC thread.
64 DEBUG_ONLY(foreign "C" heapCheckFail()); \
66 Hp = Hp - HpAlloc/*in bytes*/; \
68 R1 = ThreadYielding; \
71 if (HpAlloc <= BLOCK_SIZE \
72 && bdescr_link(CurrentNursery) != NULL) { \
74 CurrentNursery = bdescr_link(CurrentNursery); \
76 if (Capability_context_switch(MyCapability()) != 0 :: CInt) { \
77 R1 = ThreadYielding; \
80 jump %ENTRY_CODE(Sp(0)); \
90 PRE_RETURN(R1,ThreadRunGHC); \
91 jump stg_returnToSched;
94 PRE_RETURN(HeapOverflow, ThreadRunGHC) \
95 jump stg_returnToSched;
97 #define BLOCK_GENERIC \
98 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
99 jump stg_returnToSched;
101 #define YIELD_GENERIC \
102 PRE_RETURN(ThreadYielding, ThreadRunGHC) \
103 jump stg_returnToSched;
105 #define BLOCK_BUT_FIRST(c) \
106 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
108 jump stg_returnToSchedButFirst;
110 #define YIELD_TO_INTERPRETER \
111 PRE_RETURN(ThreadYielding, ThreadInterpret) \
112 jump stg_returnToSchedNotPaused;
114 /* -----------------------------------------------------------------------------
115 Heap checks in thunks/functions.
117 In these cases, node always points to the function closure. This gives
118 us an easy way to return to the function: just leave R1 on the top of
119 the stack, and have the scheduler enter it to return.
121 There are canned sequences for 'n' pointer values in registers.
122 -------------------------------------------------------------------------- */
124 INFO_TABLE_RET( stg_enter, RET_SMALL, P_ unused)
135 Sp(0) = stg_enter_info;
141 ToDo: merge the block and yield macros, calling something like BLOCK(N)
146 Should we actually ever do a yield in such a case?? -- HWL
151 TSO_what_next(CurrentTSO) = ThreadRunGHC;
161 TSO_what_next(CurrentTSO) = ThreadRunGHC;
166 /*- 2 Regs--------------------------------------------------------------------*/
174 TSO_what_next(CurrentTSO) = ThreadRunGHC;
179 /*- 3 Regs -------------------------------------------------------------------*/
188 TSO_what_next(CurrentTSO) = ThreadRunGHC;
193 /*- 4 Regs -------------------------------------------------------------------*/
203 TSO_what_next(CurrentTSO) = ThreadRunGHC;
208 /*- 5 Regs -------------------------------------------------------------------*/
219 TSO_what_next(CurrentTSO) = ThreadRunGHC;
224 /*- 6 Regs -------------------------------------------------------------------*/
236 TSO_what_next(CurrentTSO) = ThreadRunGHC;
241 /*- 7 Regs -------------------------------------------------------------------*/
254 TSO_what_next(CurrentTSO) = ThreadRunGHC;
259 /*- 8 Regs -------------------------------------------------------------------*/
273 TSO_what_next(CurrentTSO) = ThreadRunGHC;
278 // the same routines but with a block rather than a yield
285 TSO_what_next(CurrentTSO) = ThreadRunGHC;
290 /*- 2 Regs--------------------------------------------------------------------*/
298 TSO_what_next(CurrentTSO) = ThreadRunGHC;
303 /*- 3 Regs -------------------------------------------------------------------*/
312 TSO_what_next(CurrentTSO) = ThreadRunGHC;
317 /*- 4 Regs -------------------------------------------------------------------*/
327 TSO_what_next(CurrentTSO) = ThreadRunGHC;
332 /*- 5 Regs -------------------------------------------------------------------*/
343 TSO_what_next(CurrentTSO) = ThreadRunGHC;
348 /*- 6 Regs -------------------------------------------------------------------*/
360 TSO_what_next(CurrentTSO) = ThreadRunGHC;
365 /*- 7 Regs -------------------------------------------------------------------*/
378 TSO_what_next(CurrentTSO) = ThreadRunGHC;
383 /*- 8 Regs -------------------------------------------------------------------*/
397 TSO_what_next(CurrentTSO) = ThreadRunGHC;
404 #if 0 && defined(PAR)
407 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
408 saving of the thread state from the actual jump via an StgReturn.
409 We need this separation because we call RTS routines in blocking entry codes
410 before jumping back into the RTS (see parallel/FetchMe.hc).
422 TSO_what_next(CurrentTSO) = ThreadRunGHC;
429 /* -----------------------------------------------------------------------------
430 Heap checks in Primitive case alternatives
432 A primitive case alternative is entered with a value either in
433 R1, FloatReg1 or D1 depending on the return convention. All the
434 cases are covered below.
435 -------------------------------------------------------------------------- */
437 /*-- No Registers live ------------------------------------------------------ */
444 /*-- void return ------------------------------------------------------------ */
446 INFO_TABLE_RET( stg_gc_void, RET_SMALL)
449 jump %ENTRY_CODE(Sp(0));
452 /*-- R1 is boxed/unpointed -------------------------------------------------- */
454 INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, P_ unused)
458 jump %ENTRY_CODE(Sp(0));
465 Sp(0) = stg_gc_unpt_r1_info;
469 /*-- R1 is unboxed -------------------------------------------------- */
471 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
472 INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused )
476 jump %ENTRY_CODE(Sp(0));
483 Sp(0) = stg_gc_unbx_r1_info;
487 /*-- F1 contains a float ------------------------------------------------- */
489 INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused )
493 jump %ENTRY_CODE(Sp(0));
499 F_[Sp + WDS(1)] = F1;
500 Sp(0) = stg_gc_f1_info;
504 /*-- D1 contains a double ------------------------------------------------- */
506 INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused )
508 D1 = D_[Sp + WDS(1)];
509 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
510 jump %ENTRY_CODE(Sp(0));
515 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
516 D_[Sp + WDS(1)] = D1;
517 Sp(0) = stg_gc_d1_info;
522 /*-- L1 contains an int64 ------------------------------------------------- */
524 INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
526 L1 = L_[Sp + WDS(1)];
527 Sp_adj(1) + SIZEOF_StgWord64;
528 jump %ENTRY_CODE(Sp(0));
533 Sp_adj(-1) - SIZEOF_StgWord64;
534 L_[Sp + WDS(1)] = L1;
535 Sp(0) = stg_gc_l1_info;
539 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
541 INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused )
544 // one ptr is on the stack (Sp(0))
545 jump %ENTRY_CODE(Sp(1));
548 /* -----------------------------------------------------------------------------
549 Generic function entry heap check code.
551 At a function entry point, the arguments are as per the calling convention,
552 i.e. some in regs and some on the stack. There may or may not be
553 a pointer to the function closure in R1 - if there isn't, then the heap
554 check failure code in the function will arrange to load it.
556 The function's argument types are described in its info table, so we
557 can just jump to this bit of generic code to save away all the
558 registers and return to the scheduler.
560 This code arranges the stack like this:
564 +---------------------+
566 +---------------------+
568 +---------------------+
570 +---------------------+
572 The size is the number of words of arguments on the stack, and is cached
573 in the frame in order to simplify stack walking: otherwise the size of
574 this stack frame would have to be calculated by looking at f's info table.
576 -------------------------------------------------------------------------- */
584 info = %GET_FUN_INFO(UNTAG(R1));
587 type = TO_W_(StgFunInfoExtra_fun_type(info));
588 if (type == ARG_GEN) {
589 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
591 if (type == ARG_GEN_BIG) {
592 #ifdef TABLES_NEXT_TO_CODE
593 // bitmap field holds an offset
594 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
595 + %GET_ENTRY(UNTAG(R1)) /* ### */ );
597 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
600 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
605 // we don't have to save any registers away
609 Sp(0) = stg_gc_fun_info;
613 type = TO_W_(StgFunInfoExtra_fun_type(info));
615 if (type == ARG_GEN || type == ARG_GEN_BIG) {
616 // regs already saved by the heap check code
620 Sp(0) = stg_gc_fun_info;
621 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
624 jump W_[stg_stack_save_entries + WDS(type)];
625 // jumps to stg_gc_noregs after saving stuff
627 #endif /* !NO_ARG_REGS */
630 /* -----------------------------------------------------------------------------
631 Generic Apply (return point)
633 The dual to stg_fun_gc_gen (above): this fragment returns to the
634 function, passing arguments in the stack and in registers
635 appropriately. The stack layout is given above.
636 -------------------------------------------------------------------------- */
638 INFO_TABLE_RET( stg_gc_fun, RET_FUN )
643 // Minor optimisation: there are no argument registers to load up,
644 // so we can just jump straight to the function's entry point.
645 jump %GET_ENTRY(UNTAG(R1));
650 info = %GET_FUN_INFO(UNTAG(R1));
651 type = TO_W_(StgFunInfoExtra_fun_type(info));
652 if (type == ARG_GEN || type == ARG_GEN_BIG) {
653 jump StgFunInfoExtra_slow_apply(info);
655 if (type == ARG_BCO) {
656 // cover this case just to be on the safe side
659 Sp(0) = stg_apply_interp_info;
660 jump stg_yield_to_interpreter;
662 jump W_[stg_ap_stack_entries + WDS(type)];
668 /* -----------------------------------------------------------------------------
669 Generic Heap Check Code.
671 Called with Liveness mask in R9, Return address in R10.
672 Stack must be consistent (containing all necessary info pointers
675 See StgMacros.h for a description of the RET_DYN stack frame.
677 We also define an stg_gen_yield here, because it's very similar.
678 -------------------------------------------------------------------------- */
680 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
681 // on a 64-bit machine, we'll end up wasting a couple of words, but
682 // it's not a big deal.
684 #define RESTORE_EVERYTHING \
685 L1 = L_[Sp + WDS(19)]; \
686 D2 = D_[Sp + WDS(17)]; \
687 D1 = D_[Sp + WDS(15)]; \
688 F4 = F_[Sp + WDS(14)]; \
689 F3 = F_[Sp + WDS(13)]; \
690 F2 = F_[Sp + WDS(12)]; \
691 F1 = F_[Sp + WDS(11)]; \
702 #define RET_OFFSET (-19)
704 #define SAVE_EVERYTHING \
706 L_[Sp + WDS(19)] = L1; \
707 D_[Sp + WDS(17)] = D2; \
708 D_[Sp + WDS(15)] = D1; \
709 F_[Sp + WDS(14)] = F4; \
710 F_[Sp + WDS(13)] = F3; \
711 F_[Sp + WDS(12)] = F2; \
712 F_[Sp + WDS(11)] = F1; \
721 Sp(2) = R10; /* return address */ \
722 Sp(1) = R9; /* liveness mask */ \
723 Sp(0) = stg_gc_gen_info;
725 INFO_TABLE_RET( stg_gc_gen, RET_DYN )
726 /* bitmap in the above info table is unused, the real one is on the stack. */
729 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
738 // A heap check at an unboxed tuple return point. The return address
739 // is on the stack, and we can find it by using the offsets given
740 // to us in the liveness mask.
743 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
749 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
750 * because we've just failed doYouWantToGC(), not a standard heap
751 * check. GC_GENERIC would end up returning StackOverflow.
759 /* -----------------------------------------------------------------------------
761 -------------------------------------------------------------------------- */
774 /* -----------------------------------------------------------------------------
775 Yielding to the interpreter... top of stack says what to do next.
776 -------------------------------------------------------------------------- */
778 stg_yield_to_interpreter
780 YIELD_TO_INTERPRETER;
783 /* -----------------------------------------------------------------------------
785 -------------------------------------------------------------------------- */
802 Sp(0) = stg_enter_info;
806 /* -----------------------------------------------------------------------------
807 * takeMVar/putMVar-specific blocks
809 * Stack layout for a thread blocked in takeMVar:
813 * stg_block_takemvar_info
815 * Stack layout for a thread blocked in putMVar:
820 * stg_block_putmvar_info
822 * See PrimOps.hc for a description of the workings of take/putMVar.
824 * -------------------------------------------------------------------------- */
826 INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, P_ unused )
830 jump takeMVarzh_fast;
833 // code fragment executed just before we return to the scheduler
834 stg_block_takemvar_finally
837 unlockClosure(R3, stg_MVAR_DIRTY_info);
839 SET_INFO(R3, stg_MVAR_DIRTY_info);
848 Sp(0) = stg_block_takemvar_info;
850 BLOCK_BUT_FIRST(stg_block_takemvar_finally);
853 INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, P_ unused1, P_ unused2 )
861 // code fragment executed just before we return to the scheduler
862 stg_block_putmvar_finally
865 unlockClosure(R3, stg_MVAR_DIRTY_info);
867 SET_INFO(R3, stg_MVAR_DIRTY_info);
877 Sp(0) = stg_block_putmvar_info;
879 BLOCK_BUT_FIRST(stg_block_putmvar_finally);
882 // code fragment executed just before we return to the scheduler
883 stg_block_blackhole_finally
885 #if defined(THREADED_RTS)
886 // The last thing we do is release sched_lock, which is
887 // preventing other threads from accessing blackhole_queue and
888 // picking up this thread before we are finished with it.
889 RELEASE_LOCK(sched_mutex "ptr");
898 Sp(0) = stg_enter_info;
899 BLOCK_BUT_FIRST(stg_block_blackhole_finally);
902 INFO_TABLE_RET( stg_block_throwto, RET_SMALL, P_ unused, P_ unused )
907 jump killThreadzh_fast;
910 stg_block_throwto_finally
913 foreign "C" throwToReleaseTarget (R3 "ptr");
923 Sp(0) = stg_block_throwto_info;
924 BLOCK_BUT_FIRST(stg_block_throwto_finally);
927 #ifdef mingw32_HOST_OS
928 INFO_TABLE_RET( stg_block_async, RET_SMALL )
933 ares = StgTSO_block_info(CurrentTSO);
934 len = StgAsyncIOResult_len(ares);
935 errC = StgAsyncIOResult_errCode(ares);
936 StgTSO_block_info(CurrentTSO) = NULL;
937 foreign "C" free(ares "ptr");
940 jump %ENTRY_CODE(Sp(1));
946 Sp(0) = stg_block_async_info;
950 /* Used by threadDelay implementation; it would be desirable to get rid of
951 * this free()'ing void return continuation.
953 INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
957 ares = StgTSO_block_info(CurrentTSO);
958 StgTSO_block_info(CurrentTSO) = NULL;
959 foreign "C" free(ares "ptr");
961 jump %ENTRY_CODE(Sp(0));
967 Sp(0) = stg_block_async_void_info;
973 /* -----------------------------------------------------------------------------
975 -------------------------------------------------------------------------- */
977 stg_block_stmwait_finally
979 foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
985 BLOCK_BUT_FIRST(stg_block_stmwait_finally);