1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
15 /* Stack/Heap Check Failure
16 * ------------------------
18 * On discovering that a stack or heap check has failed, we do the following:
20 * - If the context_switch flag is set, indicating that there are more
21 * threads waiting to run, we yield to the scheduler
22 * (return ThreadYielding).
24 * - If Hp > HpLim, we've had a heap check failure. This means we've
25 * come to the end of the current heap block, so we try to chain
26 * another block on with ExtendNursery().
28 * - If this succeeds, we carry on without returning to the
31 * - If it fails, we return to the scheduler claiming HeapOverflow
32 * so that a garbage collection can be performed.
34 * - If Hp <= HpLim, it must have been a stack check that failed. In
35 * which case, we return to the scheduler claiming StackOverflow, the
36 * scheduler will either increase the size of our stack, or raise
37 * an exception if the stack is already too big.
39 * The effect of checking for context switch only in the heap/stack check
40 * failure code is that we'll switch threads after the current thread has
41 * reached the end of its heap block. If a thread isn't allocating
42 * at all, it won't yield. Hopefully this won't be a problem in practice.
45 #define PRE_RETURN(why,what_next) \
46 StgTSO_what_next(CurrentTSO) = what_next::I16; \
47 StgRegTable_rRet(BaseReg) = why; \
50 /* Remember that the return address is *removed* when returning to a
51 * ThreadRunGHC thread.
55 DEBUG_ONLY(foreign "C" heapCheckFail()); \
57 Hp = Hp - HpAlloc/*in bytes*/; \
58 if (HpAlloc <= BLOCK_SIZE \
59 && bdescr_link(CurrentNursery) != NULL) { \
61 CurrentNursery = bdescr_link(CurrentNursery); \
63 if (CInt[context_switch] != 0 :: CInt) { \
64 R1 = ThreadYielding; \
67 jump %ENTRY_CODE(Sp(0)); \
77 PRE_RETURN(R1,ThreadRunGHC); \
78 jump stg_returnToSched;
81 PRE_RETURN(HeapOverflow, ThreadRunGHC) \
82 jump stg_returnToSched;
84 #define BLOCK_GENERIC \
85 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
86 jump stg_returnToSched;
88 #define YIELD_GENERIC \
89 PRE_RETURN(ThreadYielding, ThreadRunGHC) \
90 jump stg_returnToSched;
92 #define BLOCK_BUT_FIRST(c) \
93 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
95 jump stg_returnToSchedButFirst;
97 #define YIELD_TO_INTERPRETER \
98 PRE_RETURN(ThreadYielding, ThreadInterpret) \
99 jump stg_returnToSchedNotPaused;
101 /* -----------------------------------------------------------------------------
102 Heap checks in thunks/functions.
104 In these cases, node always points to the function closure. This gives
105 us an easy way to return to the function: just leave R1 on the top of
106 the stack, and have the scheduler enter it to return.
108 There are canned sequences for 'n' pointer values in registers.
109 -------------------------------------------------------------------------- */
111 INFO_TABLE_RET( stg_enter, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
122 Sp(0) = stg_enter_info;
128 ToDo: merge the block and yield macros, calling something like BLOCK(N)
133 Should we actually ever do a yield in such a case?? -- HWL
138 TSO_what_next(CurrentTSO) = ThreadRunGHC;
148 TSO_what_next(CurrentTSO) = ThreadRunGHC;
153 /*- 2 Regs--------------------------------------------------------------------*/
161 TSO_what_next(CurrentTSO) = ThreadRunGHC;
166 /*- 3 Regs -------------------------------------------------------------------*/
175 TSO_what_next(CurrentTSO) = ThreadRunGHC;
180 /*- 4 Regs -------------------------------------------------------------------*/
190 TSO_what_next(CurrentTSO) = ThreadRunGHC;
195 /*- 5 Regs -------------------------------------------------------------------*/
206 TSO_what_next(CurrentTSO) = ThreadRunGHC;
211 /*- 6 Regs -------------------------------------------------------------------*/
223 TSO_what_next(CurrentTSO) = ThreadRunGHC;
228 /*- 7 Regs -------------------------------------------------------------------*/
241 TSO_what_next(CurrentTSO) = ThreadRunGHC;
246 /*- 8 Regs -------------------------------------------------------------------*/
260 TSO_what_next(CurrentTSO) = ThreadRunGHC;
265 // the same routines but with a block rather than a yield
272 TSO_what_next(CurrentTSO) = ThreadRunGHC;
277 /*- 2 Regs--------------------------------------------------------------------*/
285 TSO_what_next(CurrentTSO) = ThreadRunGHC;
290 /*- 3 Regs -------------------------------------------------------------------*/
299 TSO_what_next(CurrentTSO) = ThreadRunGHC;
304 /*- 4 Regs -------------------------------------------------------------------*/
314 TSO_what_next(CurrentTSO) = ThreadRunGHC;
319 /*- 5 Regs -------------------------------------------------------------------*/
330 TSO_what_next(CurrentTSO) = ThreadRunGHC;
335 /*- 6 Regs -------------------------------------------------------------------*/
347 TSO_what_next(CurrentTSO) = ThreadRunGHC;
352 /*- 7 Regs -------------------------------------------------------------------*/
365 TSO_what_next(CurrentTSO) = ThreadRunGHC;
370 /*- 8 Regs -------------------------------------------------------------------*/
384 TSO_what_next(CurrentTSO) = ThreadRunGHC;
391 #if 0 && defined(PAR)
394 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
395 saving of the thread state from the actual jump via an StgReturn.
396 We need this separation because we call RTS routines in blocking entry codes
397 before jumping back into the RTS (see parallel/FetchMe.hc).
409 TSO_what_next(CurrentTSO) = ThreadRunGHC;
416 /* -----------------------------------------------------------------------------
417 Heap checks in Primitive case alternatives
419 A primitive case alternative is entered with a value either in
420 R1, FloatReg1 or D1 depending on the return convention. All the
421 cases are covered below.
422 -------------------------------------------------------------------------- */
424 /*-- No Registers live ------------------------------------------------------ */
431 /*-- void return ------------------------------------------------------------ */
433 INFO_TABLE_RET( stg_gc_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL)
436 jump %ENTRY_CODE(Sp(0));
439 /*-- R1 is boxed/unpointed -------------------------------------------------- */
441 INFO_TABLE_RET( stg_gc_unpt_r1, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
445 jump %ENTRY_CODE(Sp(0));
452 Sp(0) = stg_gc_unpt_r1_info;
456 /*-- R1 is unboxed -------------------------------------------------- */
458 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
459 INFO_TABLE_RET( stg_gc_unbx_r1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
463 jump %ENTRY_CODE(Sp(0));
470 Sp(0) = stg_gc_unbx_r1_info;
474 /*-- F1 contains a float ------------------------------------------------- */
476 INFO_TABLE_RET( stg_gc_f1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
480 jump %ENTRY_CODE(Sp(0));
486 F_[Sp + WDS(1)] = F1;
487 Sp(0) = stg_gc_f1_info;
491 /*-- D1 contains a double ------------------------------------------------- */
493 /* we support doubles of either 1 or 2 words in size */
495 #if SIZEOF_DOUBLE == SIZEOF_VOID_P
496 # define DBL_BITMAP 1
499 # define DBL_BITMAP 3
503 INFO_TABLE_RET( stg_gc_d1, DBL_WORDS/*framesize*/, DBL_BITMAP/*bitmap*/, RET_SMALL )
505 D1 = D_[Sp + WDS(1)];
506 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
507 jump %ENTRY_CODE(Sp(0));
512 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
513 D_[Sp + WDS(1)] = D1;
514 Sp(0) = stg_gc_d1_info;
519 /*-- L1 contains an int64 ------------------------------------------------- */
521 /* we support int64s of either 1 or 2 words in size */
523 #if SIZEOF_VOID_P == 8
524 # define LLI_BITMAP 1
527 # define LLI_BITMAP 3
531 INFO_TABLE_RET( stg_gc_l1, LLI_WORDS/*framesize*/, LLI_BITMAP/*bitmap*/, RET_SMALL )
533 L1 = L_[Sp + WDS(1)];
534 Sp_adj(1) + SIZEOF_StgWord64;
535 jump %ENTRY_CODE(Sp(0));
540 Sp_adj(-1) - SIZEOF_StgWord64;
541 L_[Sp + WDS(1)] = L1;
542 Sp(0) = stg_gc_l1_info;
546 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
548 INFO_TABLE_RET( stg_ut_1_0_unreg, 1/*size*/, 0/*BITMAP*/, RET_SMALL )
551 // one ptr is on the stack (Sp(0))
552 jump %ENTRY_CODE(Sp(1));
555 /* -----------------------------------------------------------------------------
556 Generic function entry heap check code.
558 At a function entry point, the arguments are as per the calling convention,
559 i.e. some in regs and some on the stack. There may or may not be
560 a pointer to the function closure in R1 - if there isn't, then the heap
561 check failure code in the function will arrange to load it.
563 The function's argument types are described in its info table, so we
564 can just jump to this bit of generic code to save away all the
565 registers and return to the scheduler.
567 This code arranges the stack like this:
571 +---------------------+
573 +---------------------+
575 +---------------------+
577 +---------------------+
579 The size is the number of words of arguments on the stack, and is cached
580 in the frame in order to simplify stack walking: otherwise the size of
581 this stack frame would have to be calculated by looking at f's info table.
583 -------------------------------------------------------------------------- */
591 info = %GET_FUN_INFO(R1);
594 type = TO_W_(StgFunInfoExtra_fun_type(info));
595 if (type == ARG_GEN) {
596 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
598 if (type == ARG_GEN_BIG) {
599 #ifdef TABLES_NEXT_TO_CODE
600 // bitmap field holds an offset
601 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
602 + %GET_ENTRY(R1) /* ### */ );
604 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
607 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
612 // we don't have to save any registers away
616 Sp(0) = stg_gc_fun_info;
620 type = TO_W_(StgFunInfoExtra_fun_type(info));
622 if (type == ARG_GEN || type == ARG_GEN_BIG) {
623 // regs already saved by the heap check code
627 Sp(0) = stg_gc_fun_info;
628 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
631 jump W_[stg_stack_save_entries + WDS(type)];
632 // jumps to stg_gc_noregs after saving stuff
634 #endif /* !NO_ARG_REGS */
637 /* -----------------------------------------------------------------------------
638 Generic Apply (return point)
640 The dual to stg_fun_gc_gen (above): this fragment returns to the
641 function, passing arguments in the stack and in registers
642 appropriately. The stack layout is given above.
643 -------------------------------------------------------------------------- */
645 INFO_TABLE_RET( stg_gc_fun, 0/*framesize*/, 0/*bitmap*/, RET_FUN )
650 // Minor optimisation: there are no argument registers to load up,
651 // so we can just jump straight to the function's entry point.
657 info = %GET_FUN_INFO(R1);
658 type = TO_W_(StgFunInfoExtra_fun_type(info));
659 if (type == ARG_GEN || type == ARG_GEN_BIG) {
660 jump StgFunInfoExtra_slow_apply(info);
662 if (type == ARG_BCO) {
663 // cover this case just to be on the safe side
666 Sp(0) = stg_apply_interp_info;
667 jump stg_yield_to_interpreter;
669 jump W_[stg_ap_stack_entries + WDS(type)];
675 /* -----------------------------------------------------------------------------
676 Generic Heap Check Code.
678 Called with Liveness mask in R9, Return address in R10.
679 Stack must be consistent (containing all necessary info pointers
682 See StgMacros.h for a description of the RET_DYN stack frame.
684 We also define an stg_gen_yield here, because it's very similar.
685 -------------------------------------------------------------------------- */
687 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
688 // on a 64-bit machine, we'll end up wasting a couple of words, but
689 // it's not a big deal.
691 #define RESTORE_EVERYTHING \
692 L1 = L_[Sp + WDS(19)]; \
693 D2 = D_[Sp + WDS(17)]; \
694 D1 = D_[Sp + WDS(15)]; \
695 F4 = F_[Sp + WDS(14)]; \
696 F3 = F_[Sp + WDS(13)]; \
697 F2 = F_[Sp + WDS(12)]; \
698 F1 = F_[Sp + WDS(11)]; \
709 #define RET_OFFSET (-19)
711 #define SAVE_EVERYTHING \
713 L_[Sp + WDS(19)] = L1; \
714 D_[Sp + WDS(17)] = D2; \
715 D_[Sp + WDS(15)] = D1; \
716 F_[Sp + WDS(14)] = F4; \
717 F_[Sp + WDS(13)] = F3; \
718 F_[Sp + WDS(12)] = F2; \
719 F_[Sp + WDS(11)] = F1; \
728 Sp(2) = R10; /* return address */ \
729 Sp(1) = R9; /* liveness mask */ \
730 Sp(0) = stg_gc_gen_info;
732 INFO_TABLE_RET( stg_gc_gen, 0/*framesize*/, 0/*bitmap*/, RET_DYN )
733 /* bitmap in the above info table is unused, the real one is on the stack. */
736 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
745 // A heap check at an unboxed tuple return point. The return address
746 // is on the stack, and we can find it by using the offsets given
747 // to us in the liveness mask.
750 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
756 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
757 * because we've just failed doYouWantToGC(), not a standard heap
758 * check. GC_GENERIC would end up returning StackOverflow.
766 /* -----------------------------------------------------------------------------
768 -------------------------------------------------------------------------- */
781 /* -----------------------------------------------------------------------------
782 Yielding to the interpreter... top of stack says what to do next.
783 -------------------------------------------------------------------------- */
785 stg_yield_to_interpreter
787 YIELD_TO_INTERPRETER;
790 /* -----------------------------------------------------------------------------
792 -------------------------------------------------------------------------- */
809 Sp(0) = stg_enter_info;
813 /* -----------------------------------------------------------------------------
814 * takeMVar/putMVar-specific blocks
816 * Stack layout for a thread blocked in takeMVar:
820 * stg_block_takemvar_info
822 * Stack layout for a thread blocked in putMVar:
827 * stg_block_putmvar_info
829 * See PrimOps.hc for a description of the workings of take/putMVar.
831 * -------------------------------------------------------------------------- */
833 INFO_TABLE_RET( stg_block_takemvar, 1/*framesize*/, 0/*bitmap*/, RET_SMALL )
837 jump takeMVarzh_fast;
840 // code fragment executed just before we return to the scheduler
841 stg_block_takemvar_finally
844 foreign "C" unlockClosure(R3 "ptr", stg_EMPTY_MVAR_info);
853 Sp(0) = stg_block_takemvar_info;
855 BLOCK_BUT_FIRST(stg_block_takemvar_finally);
858 INFO_TABLE_RET( stg_block_putmvar, 2/*framesize*/, 0/*bitmap*/, RET_SMALL )
866 // code fragment executed just before we return to the scheduler
867 stg_block_putmvar_finally
870 foreign "C" unlockClosure(R3 "ptr", stg_FULL_MVAR_info);
880 Sp(0) = stg_block_putmvar_info;
882 BLOCK_BUT_FIRST(stg_block_putmvar_finally);
885 // code fragment executed just before we return to the scheduler
886 stg_block_blackhole_finally
889 // The last thing we do is release sched_lock, which is
890 // preventing other threads from accessing blackhole_queue and
891 // picking up this thread before we are finished with it.
892 foreign "C" RELEASE_LOCK(sched_mutex "ptr");
901 Sp(0) = stg_enter_info;
902 BLOCK_BUT_FIRST(stg_block_blackhole_finally);
905 #ifdef mingw32_HOST_OS
906 INFO_TABLE_RET( stg_block_async, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
911 ares = StgTSO_block_info(CurrentTSO);
912 len = StgAsyncIOResult_len(ares);
913 errC = StgAsyncIOResult_errCode(ares);
914 StgTSO_block_info(CurrentTSO) = NULL;
915 foreign "C" free(ares "ptr");
918 jump %ENTRY_CODE(Sp(1));
924 Sp(0) = stg_block_async_info;
928 /* Used by threadDelay implementation; it would be desirable to get rid of
929 * this free()'ing void return continuation.
931 INFO_TABLE_RET( stg_block_async_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
935 ares = StgTSO_block_info(CurrentTSO);
936 StgTSO_block_info(CurrentTSO) = NULL;
937 foreign "C" free(ares "ptr");
939 jump %ENTRY_CODE(Sp(0));
945 Sp(0) = stg_block_async_void_info;
951 /* -----------------------------------------------------------------------------
953 -------------------------------------------------------------------------- */
955 stg_block_stmwait_finally
957 foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
963 BLOCK_BUT_FIRST(stg_block_stmwait_finally);