1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
15 /* Stack/Heap Check Failure
16 * ------------------------
18 * On discovering that a stack or heap check has failed, we do the following:
20 * - If the context_switch flag is set, indicating that there are more
21 * threads waiting to run, we yield to the scheduler
22 * (return ThreadYielding).
24 * - If Hp > HpLim, we've had a heap check failure. This means we've
25 * come to the end of the current heap block, so we try to chain
26 * another block on with ExtendNursery().
28 * - If this succeeds, we carry on without returning to the
31 * - If it fails, we return to the scheduler claiming HeapOverflow
32 * so that a garbage collection can be performed.
34 * - If Hp <= HpLim, it must have been a stack check that failed. In
35 * which case, we return to the scheduler claiming StackOverflow, the
36 * scheduler will either increase the size of our stack, or raise
37 * an exception if the stack is already too big.
39 * The effect of checking for context switch only in the heap/stack check
40 * failure code is that we'll switch threads after the current thread has
41 * reached the end of its heap block. If a thread isn't allocating
42 * at all, it won't yield. Hopefully this won't be a problem in practice.
45 #define PRE_RETURN(why,what_next) \
46 StgTSO_what_next(CurrentTSO) = what_next::I16; \
47 StgRegTable_rRet(BaseReg) = why; \
50 /* Remember that the return address is *removed* when returning to a
51 * ThreadRunGHC thread.
55 DEBUG_ONLY(foreign "C" heapCheckFail()); \
57 Hp = Hp - HpAlloc/*in bytes*/; \
58 if (HpAlloc <= BLOCK_SIZE \
59 && bdescr_link(CurrentNursery) != NULL) { \
61 CurrentNursery = bdescr_link(CurrentNursery); \
63 if (CInt[context_switch] != 0 :: CInt) { \
64 R1 = ThreadYielding; \
67 jump %ENTRY_CODE(Sp(0)); \
77 PRE_RETURN(R1,ThreadRunGHC); \
78 jump stg_returnToSched;
81 PRE_RETURN(HeapOverflow, ThreadRunGHC) \
82 jump stg_returnToSched;
84 #define BLOCK_GENERIC \
85 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
86 jump stg_returnToSched;
88 #define YIELD_GENERIC \
89 PRE_RETURN(ThreadYielding, ThreadRunGHC) \
90 jump stg_returnToSched;
92 #define BLOCK_BUT_FIRST(c) \
93 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
95 jump stg_returnToSchedButFirst;
97 #define YIELD_TO_INTERPRETER \
98 PRE_RETURN(ThreadYielding, ThreadInterpret) \
99 jump stg_returnToSchedNotPaused;
101 /* -----------------------------------------------------------------------------
102 Heap checks in thunks/functions.
104 In these cases, node always points to the function closure. This gives
105 us an easy way to return to the function: just leave R1 on the top of
106 the stack, and have the scheduler enter it to return.
108 There are canned sequences for 'n' pointer values in registers.
109 -------------------------------------------------------------------------- */
111 INFO_TABLE_RET( stg_enter, RET_SMALL, "ptr" W_ unused)
122 Sp(0) = stg_enter_info;
128 ToDo: merge the block and yield macros, calling something like BLOCK(N)
133 Should we actually ever do a yield in such a case?? -- HWL
138 TSO_what_next(CurrentTSO) = ThreadRunGHC;
148 TSO_what_next(CurrentTSO) = ThreadRunGHC;
153 /*- 2 Regs--------------------------------------------------------------------*/
161 TSO_what_next(CurrentTSO) = ThreadRunGHC;
166 /*- 3 Regs -------------------------------------------------------------------*/
175 TSO_what_next(CurrentTSO) = ThreadRunGHC;
180 /*- 4 Regs -------------------------------------------------------------------*/
190 TSO_what_next(CurrentTSO) = ThreadRunGHC;
195 /*- 5 Regs -------------------------------------------------------------------*/
206 TSO_what_next(CurrentTSO) = ThreadRunGHC;
211 /*- 6 Regs -------------------------------------------------------------------*/
223 TSO_what_next(CurrentTSO) = ThreadRunGHC;
228 /*- 7 Regs -------------------------------------------------------------------*/
241 TSO_what_next(CurrentTSO) = ThreadRunGHC;
246 /*- 8 Regs -------------------------------------------------------------------*/
260 TSO_what_next(CurrentTSO) = ThreadRunGHC;
265 // the same routines but with a block rather than a yield
272 TSO_what_next(CurrentTSO) = ThreadRunGHC;
277 /*- 2 Regs--------------------------------------------------------------------*/
285 TSO_what_next(CurrentTSO) = ThreadRunGHC;
290 /*- 3 Regs -------------------------------------------------------------------*/
299 TSO_what_next(CurrentTSO) = ThreadRunGHC;
304 /*- 4 Regs -------------------------------------------------------------------*/
314 TSO_what_next(CurrentTSO) = ThreadRunGHC;
319 /*- 5 Regs -------------------------------------------------------------------*/
330 TSO_what_next(CurrentTSO) = ThreadRunGHC;
335 /*- 6 Regs -------------------------------------------------------------------*/
347 TSO_what_next(CurrentTSO) = ThreadRunGHC;
352 /*- 7 Regs -------------------------------------------------------------------*/
365 TSO_what_next(CurrentTSO) = ThreadRunGHC;
370 /*- 8 Regs -------------------------------------------------------------------*/
384 TSO_what_next(CurrentTSO) = ThreadRunGHC;
391 #if 0 && defined(PAR)
394 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
395 saving of the thread state from the actual jump via an StgReturn.
396 We need this separation because we call RTS routines in blocking entry codes
397 before jumping back into the RTS (see parallel/FetchMe.hc).
409 TSO_what_next(CurrentTSO) = ThreadRunGHC;
416 /* -----------------------------------------------------------------------------
417 Heap checks in Primitive case alternatives
419 A primitive case alternative is entered with a value either in
420 R1, FloatReg1 or D1 depending on the return convention. All the
421 cases are covered below.
422 -------------------------------------------------------------------------- */
424 /*-- No Registers live ------------------------------------------------------ */
431 /*-- void return ------------------------------------------------------------ */
433 INFO_TABLE_RET( stg_gc_void, RET_SMALL)
436 jump %ENTRY_CODE(Sp(0));
439 /*-- R1 is boxed/unpointed -------------------------------------------------- */
441 INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, "ptr" W_ unused)
445 jump %ENTRY_CODE(Sp(0));
452 Sp(0) = stg_gc_unpt_r1_info;
456 /*-- R1 is unboxed -------------------------------------------------- */
458 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
459 INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused )
463 jump %ENTRY_CODE(Sp(0));
470 Sp(0) = stg_gc_unbx_r1_info;
474 /*-- F1 contains a float ------------------------------------------------- */
476 INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused )
480 jump %ENTRY_CODE(Sp(0));
486 F_[Sp + WDS(1)] = F1;
487 Sp(0) = stg_gc_f1_info;
491 /*-- D1 contains a double ------------------------------------------------- */
493 INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused )
495 D1 = D_[Sp + WDS(1)];
496 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
497 jump %ENTRY_CODE(Sp(0));
502 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
503 D_[Sp + WDS(1)] = D1;
504 Sp(0) = stg_gc_d1_info;
509 /*-- L1 contains an int64 ------------------------------------------------- */
511 INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
513 L1 = L_[Sp + WDS(1)];
514 Sp_adj(1) + SIZEOF_StgWord64;
515 jump %ENTRY_CODE(Sp(0));
520 Sp_adj(-1) - SIZEOF_StgWord64;
521 L_[Sp + WDS(1)] = L1;
522 Sp(0) = stg_gc_l1_info;
526 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
528 INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused )
531 // one ptr is on the stack (Sp(0))
532 jump %ENTRY_CODE(Sp(1));
535 /* -----------------------------------------------------------------------------
536 Generic function entry heap check code.
538 At a function entry point, the arguments are as per the calling convention,
539 i.e. some in regs and some on the stack. There may or may not be
540 a pointer to the function closure in R1 - if there isn't, then the heap
541 check failure code in the function will arrange to load it.
543 The function's argument types are described in its info table, so we
544 can just jump to this bit of generic code to save away all the
545 registers and return to the scheduler.
547 This code arranges the stack like this:
551 +---------------------+
553 +---------------------+
555 +---------------------+
557 +---------------------+
559 The size is the number of words of arguments on the stack, and is cached
560 in the frame in order to simplify stack walking: otherwise the size of
561 this stack frame would have to be calculated by looking at f's info table.
563 -------------------------------------------------------------------------- */
571 info = %GET_FUN_INFO(R1);
574 type = TO_W_(StgFunInfoExtra_fun_type(info));
575 if (type == ARG_GEN) {
576 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
578 if (type == ARG_GEN_BIG) {
579 #ifdef TABLES_NEXT_TO_CODE
580 // bitmap field holds an offset
581 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
582 + %GET_ENTRY(R1) /* ### */ );
584 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
587 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
592 // we don't have to save any registers away
596 Sp(0) = stg_gc_fun_info;
600 type = TO_W_(StgFunInfoExtra_fun_type(info));
602 if (type == ARG_GEN || type == ARG_GEN_BIG) {
603 // regs already saved by the heap check code
607 Sp(0) = stg_gc_fun_info;
608 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
611 jump W_[stg_stack_save_entries + WDS(type)];
612 // jumps to stg_gc_noregs after saving stuff
614 #endif /* !NO_ARG_REGS */
617 /* -----------------------------------------------------------------------------
618 Generic Apply (return point)
620 The dual to stg_fun_gc_gen (above): this fragment returns to the
621 function, passing arguments in the stack and in registers
622 appropriately. The stack layout is given above.
623 -------------------------------------------------------------------------- */
625 INFO_TABLE_RET( stg_gc_fun, RET_FUN )
630 // Minor optimisation: there are no argument registers to load up,
631 // so we can just jump straight to the function's entry point.
637 info = %GET_FUN_INFO(R1);
638 type = TO_W_(StgFunInfoExtra_fun_type(info));
639 if (type == ARG_GEN || type == ARG_GEN_BIG) {
640 jump StgFunInfoExtra_slow_apply(info);
642 if (type == ARG_BCO) {
643 // cover this case just to be on the safe side
646 Sp(0) = stg_apply_interp_info;
647 jump stg_yield_to_interpreter;
649 jump W_[stg_ap_stack_entries + WDS(type)];
655 /* -----------------------------------------------------------------------------
656 Generic Heap Check Code.
658 Called with Liveness mask in R9, Return address in R10.
659 Stack must be consistent (containing all necessary info pointers
662 See StgMacros.h for a description of the RET_DYN stack frame.
664 We also define an stg_gen_yield here, because it's very similar.
665 -------------------------------------------------------------------------- */
667 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
668 // on a 64-bit machine, we'll end up wasting a couple of words, but
669 // it's not a big deal.
671 #define RESTORE_EVERYTHING \
672 L1 = L_[Sp + WDS(19)]; \
673 D2 = D_[Sp + WDS(17)]; \
674 D1 = D_[Sp + WDS(15)]; \
675 F4 = F_[Sp + WDS(14)]; \
676 F3 = F_[Sp + WDS(13)]; \
677 F2 = F_[Sp + WDS(12)]; \
678 F1 = F_[Sp + WDS(11)]; \
689 #define RET_OFFSET (-19)
691 #define SAVE_EVERYTHING \
693 L_[Sp + WDS(19)] = L1; \
694 D_[Sp + WDS(17)] = D2; \
695 D_[Sp + WDS(15)] = D1; \
696 F_[Sp + WDS(14)] = F4; \
697 F_[Sp + WDS(13)] = F3; \
698 F_[Sp + WDS(12)] = F2; \
699 F_[Sp + WDS(11)] = F1; \
708 Sp(2) = R10; /* return address */ \
709 Sp(1) = R9; /* liveness mask */ \
710 Sp(0) = stg_gc_gen_info;
712 INFO_TABLE_RET( stg_gc_gen, RET_DYN )
713 /* bitmap in the above info table is unused, the real one is on the stack. */
716 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
725 // A heap check at an unboxed tuple return point. The return address
726 // is on the stack, and we can find it by using the offsets given
727 // to us in the liveness mask.
730 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
736 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
737 * because we've just failed doYouWantToGC(), not a standard heap
738 * check. GC_GENERIC would end up returning StackOverflow.
746 /* -----------------------------------------------------------------------------
748 -------------------------------------------------------------------------- */
761 /* -----------------------------------------------------------------------------
762 Yielding to the interpreter... top of stack says what to do next.
763 -------------------------------------------------------------------------- */
765 stg_yield_to_interpreter
767 YIELD_TO_INTERPRETER;
770 /* -----------------------------------------------------------------------------
772 -------------------------------------------------------------------------- */
789 Sp(0) = stg_enter_info;
793 /* -----------------------------------------------------------------------------
794 * takeMVar/putMVar-specific blocks
796 * Stack layout for a thread blocked in takeMVar:
800 * stg_block_takemvar_info
802 * Stack layout for a thread blocked in putMVar:
807 * stg_block_putmvar_info
809 * See PrimOps.hc for a description of the workings of take/putMVar.
811 * -------------------------------------------------------------------------- */
813 INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, "ptr" W_ unused )
817 jump takeMVarzh_fast;
820 // code fragment executed just before we return to the scheduler
821 stg_block_takemvar_finally
824 unlockClosure(R3, stg_EMPTY_MVAR_info);
833 Sp(0) = stg_block_takemvar_info;
835 BLOCK_BUT_FIRST(stg_block_takemvar_finally);
838 INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, "ptr" W_ unused1, "ptr" W_ unused2 )
846 // code fragment executed just before we return to the scheduler
847 stg_block_putmvar_finally
850 unlockClosure(R3, stg_FULL_MVAR_info);
860 Sp(0) = stg_block_putmvar_info;
862 BLOCK_BUT_FIRST(stg_block_putmvar_finally);
865 // code fragment executed just before we return to the scheduler
866 stg_block_blackhole_finally
868 #if defined(THREADED_RTS)
869 // The last thing we do is release sched_lock, which is
870 // preventing other threads from accessing blackhole_queue and
871 // picking up this thread before we are finished with it.
872 foreign "C" RELEASE_LOCK(sched_mutex "ptr");
881 Sp(0) = stg_enter_info;
882 BLOCK_BUT_FIRST(stg_block_blackhole_finally);
885 INFO_TABLE_RET( stg_block_throwto, RET_SMALL, "ptr" W_ unused, "ptr" W_ unused )
890 jump killThreadzh_fast;
893 stg_block_throwto_finally
896 foreign "C" throwToReleaseTarget (R3 "ptr");
906 Sp(0) = stg_block_throwto_info;
907 BLOCK_BUT_FIRST(stg_block_throwto_finally);
910 #ifdef mingw32_HOST_OS
911 INFO_TABLE_RET( stg_block_async, RET_SMALL )
916 ares = StgTSO_block_info(CurrentTSO);
917 len = StgAsyncIOResult_len(ares);
918 errC = StgAsyncIOResult_errCode(ares);
919 StgTSO_block_info(CurrentTSO) = NULL;
920 foreign "C" free(ares "ptr");
923 jump %ENTRY_CODE(Sp(1));
929 Sp(0) = stg_block_async_info;
933 /* Used by threadDelay implementation; it would be desirable to get rid of
934 * this free()'ing void return continuation.
936 INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
940 ares = StgTSO_block_info(CurrentTSO);
941 StgTSO_block_info(CurrentTSO) = NULL;
942 foreign "C" free(ares "ptr");
944 jump %ENTRY_CODE(Sp(0));
950 Sp(0) = stg_block_async_void_info;
956 /* -----------------------------------------------------------------------------
958 -------------------------------------------------------------------------- */
960 stg_block_stmwait_finally
962 foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
968 BLOCK_BUT_FIRST(stg_block_stmwait_finally);