1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
15 /* Stack/Heap Check Failure
16 * ------------------------
18 * On discovering that a stack or heap check has failed, we do the following:
20 * - If the context_switch flag is set, indicating that there are more
21 * threads waiting to run, we yield to the scheduler
22 * (return ThreadYielding).
24 * - If Hp > HpLim, we've had a heap check failure. This means we've
25 * come to the end of the current heap block, so we try to chain
26 * another block on with ExtendNursery().
28 * - If this succeeds, we carry on without returning to the
31 * - If it fails, we return to the scheduler claiming HeapOverflow
32 * so that a garbage collection can be performed.
34 * - If Hp <= HpLim, it must have been a stack check that failed. In
35 * which case, we return to the scheduler claiming StackOverflow, the
36 * scheduler will either increase the size of our stack, or raise
37 * an exception if the stack is already too big.
39 * The effect of checking for context switch only in the heap/stack check
40 * failure code is that we'll switch threads after the current thread has
41 * reached the end of its heap block. If a thread isn't allocating
42 * at all, it won't yield. Hopefully this won't be a problem in practice.
45 /* Remember that the return address is *removed* when returning to a
46 * ThreadRunGHC thread.
50 DEBUG_ONLY(foreign "C" heapCheckFail()); \
52 Hp = Hp - HpAlloc/*in bytes*/; \
53 if (HpAlloc <= BLOCK_SIZE \
54 && bdescr_link(CurrentNursery) != NULL) { \
56 CurrentNursery = bdescr_link(CurrentNursery); \
58 if (CInt[context_switch] != 0 :: CInt) { \
59 R1 = ThreadYielding; \
62 jump %ENTRY_CODE(Sp(0)); \
72 SAVE_THREAD_STATE(); \
73 StgTSO_what_next(CurrentTSO) = ThreadRunGHC::I16; \
76 #define RETURN_TO_SCHED(why,what_next) \
77 SAVE_THREAD_STATE(); \
78 StgTSO_what_next(CurrentTSO) = what_next::I16; \
82 #define HP_GENERIC RETURN_TO_SCHED(HeapOverflow, ThreadRunGHC)
83 #define YIELD_GENERIC RETURN_TO_SCHED(ThreadYielding, ThreadRunGHC)
84 #define YIELD_TO_INTERPRETER RETURN_TO_SCHED(ThreadYielding, ThreadInterpret)
85 #define BLOCK_GENERIC RETURN_TO_SCHED(ThreadBlocked, ThreadRunGHC)
87 /* -----------------------------------------------------------------------------
88 Heap checks in thunks/functions.
90 In these cases, node always points to the function closure. This gives
91 us an easy way to return to the function: just leave R1 on the top of
92 the stack, and have the scheduler enter it to return.
94 There are canned sequences for 'n' pointer values in registers.
95 -------------------------------------------------------------------------- */
97 INFO_TABLE_RET( stg_enter, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
108 Sp(0) = stg_enter_info;
114 ToDo: merge the block and yield macros, calling something like BLOCK(N)
119 Should we actually ever do a yield in such a case?? -- HWL
124 TSO_what_next(CurrentTSO) = ThreadRunGHC;
134 TSO_what_next(CurrentTSO) = ThreadRunGHC;
139 /*- 2 Regs--------------------------------------------------------------------*/
147 TSO_what_next(CurrentTSO) = ThreadRunGHC;
152 /*- 3 Regs -------------------------------------------------------------------*/
161 TSO_what_next(CurrentTSO) = ThreadRunGHC;
166 /*- 4 Regs -------------------------------------------------------------------*/
176 TSO_what_next(CurrentTSO) = ThreadRunGHC;
181 /*- 5 Regs -------------------------------------------------------------------*/
192 TSO_what_next(CurrentTSO) = ThreadRunGHC;
197 /*- 6 Regs -------------------------------------------------------------------*/
209 TSO_what_next(CurrentTSO) = ThreadRunGHC;
214 /*- 7 Regs -------------------------------------------------------------------*/
227 TSO_what_next(CurrentTSO) = ThreadRunGHC;
232 /*- 8 Regs -------------------------------------------------------------------*/
246 TSO_what_next(CurrentTSO) = ThreadRunGHC;
251 // the same routines but with a block rather than a yield
258 TSO_what_next(CurrentTSO) = ThreadRunGHC;
263 /*- 2 Regs--------------------------------------------------------------------*/
271 TSO_what_next(CurrentTSO) = ThreadRunGHC;
276 /*- 3 Regs -------------------------------------------------------------------*/
285 TSO_what_next(CurrentTSO) = ThreadRunGHC;
290 /*- 4 Regs -------------------------------------------------------------------*/
300 TSO_what_next(CurrentTSO) = ThreadRunGHC;
305 /*- 5 Regs -------------------------------------------------------------------*/
316 TSO_what_next(CurrentTSO) = ThreadRunGHC;
321 /*- 6 Regs -------------------------------------------------------------------*/
333 TSO_what_next(CurrentTSO) = ThreadRunGHC;
338 /*- 7 Regs -------------------------------------------------------------------*/
351 TSO_what_next(CurrentTSO) = ThreadRunGHC;
356 /*- 8 Regs -------------------------------------------------------------------*/
370 TSO_what_next(CurrentTSO) = ThreadRunGHC;
377 #if 0 && defined(PAR)
380 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
381 saving of the thread state from the actual jump via an StgReturn.
382 We need this separation because we call RTS routines in blocking entry codes
383 before jumping back into the RTS (see parallel/FetchMe.hc).
395 TSO_what_next(CurrentTSO) = ThreadRunGHC;
402 /* -----------------------------------------------------------------------------
403 Heap checks in Primitive case alternatives
405 A primitive case alternative is entered with a value either in
406 R1, FloatReg1 or D1 depending on the return convention. All the
407 cases are covered below.
408 -------------------------------------------------------------------------- */
410 /*-- No Registers live ------------------------------------------------------ */
417 /*-- void return ------------------------------------------------------------ */
419 INFO_TABLE_RET( stg_gc_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL)
422 jump %ENTRY_CODE(Sp(0));
425 /*-- R1 is boxed/unpointed -------------------------------------------------- */
427 INFO_TABLE_RET( stg_gc_unpt_r1, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
431 jump %ENTRY_CODE(Sp(0));
438 Sp(0) = stg_gc_unpt_r1_info;
442 /*-- R1 is unboxed -------------------------------------------------- */
444 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
445 INFO_TABLE_RET( stg_gc_unbx_r1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
449 jump %ENTRY_CODE(Sp(0));
456 Sp(0) = stg_gc_unbx_r1_info;
460 /*-- F1 contains a float ------------------------------------------------- */
462 INFO_TABLE_RET( stg_gc_f1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
466 jump %ENTRY_CODE(Sp(0));
472 F_[Sp + WDS(1)] = F1;
473 Sp(0) = stg_gc_f1_info;
477 /*-- D1 contains a double ------------------------------------------------- */
479 /* we support doubles of either 1 or 2 words in size */
481 #if SIZEOF_DOUBLE == SIZEOF_VOID_P
482 # define DBL_BITMAP 1
485 # define DBL_BITMAP 3
489 INFO_TABLE_RET( stg_gc_d1, DBL_WORDS/*framesize*/, DBL_BITMAP/*bitmap*/, RET_SMALL )
491 D1 = D_[Sp + WDS(1)];
492 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
493 jump %ENTRY_CODE(Sp(0));
498 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
499 D_[Sp + WDS(1)] = D1;
500 Sp(0) = stg_gc_d1_info;
505 /*-- L1 contains an int64 ------------------------------------------------- */
507 /* we support int64s of either 1 or 2 words in size */
509 #if SIZEOF_VOID_P == 8
510 # define LLI_BITMAP 1
513 # define LLI_BITMAP 3
517 INFO_TABLE_RET( stg_gc_l1, LLI_WORDS/*framesize*/, LLI_BITMAP/*bitmap*/, RET_SMALL )
519 L1 = L_[Sp + WDS(1)];
520 Sp_adj(1) + SIZEOF_StgWord64;
521 jump %ENTRY_CODE(Sp(0));
526 Sp_adj(-1) - SIZEOF_StgWord64;
527 L_[Sp + WDS(1)] = L1;
528 Sp(0) = stg_gc_l1_info;
532 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
534 INFO_TABLE_RET( stg_ut_1_0_unreg, 1/*size*/, 0/*BITMAP*/, RET_SMALL )
537 // one ptr is on the stack (Sp(0))
538 jump %ENTRY_CODE(Sp(1));
541 /* -----------------------------------------------------------------------------
542 Generic function entry heap check code.
544 At a function entry point, the arguments are as per the calling convention,
545 i.e. some in regs and some on the stack. There may or may not be
546 a pointer to the function closure in R1 - if there isn't, then the heap
547 check failure code in the function will arrange to load it.
549 The function's argument types are described in its info table, so we
550 can just jump to this bit of generic code to save away all the
551 registers and return to the scheduler.
553 This code arranges the stack like this:
557 +---------------------+
559 +---------------------+
561 +---------------------+
563 +---------------------+
565 The size is the number of words of arguments on the stack, and is cached
566 in the frame in order to simplify stack walking: otherwise the size of
567 this stack frame would have to be calculated by looking at f's info table.
569 -------------------------------------------------------------------------- */
577 info = %GET_FUN_INFO(R1);
580 type = TO_W_(StgFunInfoExtra_fun_type(info));
581 if (type == ARG_GEN) {
582 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
584 if (type == ARG_GEN_BIG) {
585 #ifdef TABLES_NEXT_TO_CODE
586 // bitmap field holds an offset
587 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
588 + %GET_ENTRY(R1) /* ### */ );
590 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
593 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
598 // we don't have to save any registers away
602 Sp(0) = stg_gc_fun_info;
606 type = TO_W_(StgFunInfoExtra_fun_type(info));
608 if (type == ARG_GEN || type == ARG_GEN_BIG) {
609 // regs already saved by the heap check code
613 Sp(0) = stg_gc_fun_info;
614 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
617 jump W_[stg_stack_save_entries + WDS(type)];
618 // jumps to stg_gc_noregs after saving stuff
620 #endif /* !NO_ARG_REGS */
623 /* -----------------------------------------------------------------------------
624 Generic Apply (return point)
626 The dual to stg_fun_gc_gen (above): this fragment returns to the
627 function, passing arguments in the stack and in registers
628 appropriately. The stack layout is given above.
629 -------------------------------------------------------------------------- */
631 INFO_TABLE_RET( stg_gc_fun, 0/*framesize*/, 0/*bitmap*/, RET_FUN )
636 // Minor optimisation: there are no argument registers to load up,
637 // so we can just jump straight to the function's entry point.
643 info = %GET_FUN_INFO(R1);
644 type = TO_W_(StgFunInfoExtra_fun_type(info));
645 if (type == ARG_GEN || type == ARG_GEN_BIG) {
646 jump StgFunInfoExtra_slow_apply(info);
648 if (type == ARG_BCO) {
649 // cover this case just to be on the safe side
652 Sp(0) = stg_apply_interp_info;
653 jump stg_yield_to_interpreter;
655 jump W_[stg_ap_stack_entries + WDS(type)];
661 /* -----------------------------------------------------------------------------
662 Generic Heap Check Code.
664 Called with Liveness mask in R9, Return address in R10.
665 Stack must be consistent (containing all necessary info pointers
668 See StgMacros.h for a description of the RET_DYN stack frame.
670 We also define an stg_gen_yield here, because it's very similar.
671 -------------------------------------------------------------------------- */
673 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
674 // on a 64-bit machine, we'll end up wasting a couple of words, but
675 // it's not a big deal.
677 #define RESTORE_EVERYTHING \
678 L1 = L_[Sp + WDS(19)]; \
679 D2 = D_[Sp + WDS(17)]; \
680 D1 = D_[Sp + WDS(15)]; \
681 F4 = F_[Sp + WDS(14)]; \
682 F3 = F_[Sp + WDS(13)]; \
683 F2 = F_[Sp + WDS(12)]; \
684 F1 = F_[Sp + WDS(11)]; \
695 #define RET_OFFSET (-19)
697 #define SAVE_EVERYTHING \
699 L_[Sp + WDS(19)] = L1; \
700 D_[Sp + WDS(17)] = D2; \
701 D_[Sp + WDS(15)] = D1; \
702 F_[Sp + WDS(14)] = F4; \
703 F_[Sp + WDS(13)] = F3; \
704 F_[Sp + WDS(12)] = F2; \
705 F_[Sp + WDS(11)] = F1; \
714 Sp(2) = R10; /* return address */ \
715 Sp(1) = R9; /* liveness mask */ \
716 Sp(0) = stg_gc_gen_info;
718 INFO_TABLE_RET( stg_gc_gen, 0/*framesize*/, 0/*bitmap*/, RET_DYN )
719 /* bitmap in the above info table is unused, the real one is on the stack. */
722 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
731 // A heap check at an unboxed tuple return point. The return address
732 // is on the stack, and we can find it by using the offsets given
733 // to us in the liveness mask.
736 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
742 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
743 * because we've just failed doYouWantToGC(), not a standard heap
744 * check. GC_GENERIC would end up returning StackOverflow.
752 /* -----------------------------------------------------------------------------
754 -------------------------------------------------------------------------- */
767 /* -----------------------------------------------------------------------------
768 Yielding to the interpreter... top of stack says what to do next.
769 -------------------------------------------------------------------------- */
771 stg_yield_to_interpreter
773 YIELD_TO_INTERPRETER;
776 /* -----------------------------------------------------------------------------
778 -------------------------------------------------------------------------- */
795 Sp(0) = stg_enter_info;
799 /* -----------------------------------------------------------------------------
800 * takeMVar/putMVar-specific blocks
802 * Stack layout for a thread blocked in takeMVar:
806 * stg_block_takemvar_info
808 * Stack layout for a thread blocked in putMVar:
813 * stg_block_putmvar_info
815 * See PrimOps.hc for a description of the workings of take/putMVar.
817 * -------------------------------------------------------------------------- */
819 INFO_TABLE_RET( stg_block_takemvar, 1/*framesize*/, 0/*bitmap*/, RET_SMALL )
823 jump takeMVarzh_fast;
830 Sp(0) = stg_block_takemvar_info;
834 INFO_TABLE_RET( stg_block_putmvar, 2/*framesize*/, 0/*bitmap*/, RET_SMALL )
847 Sp(0) = stg_block_putmvar_info;
851 #ifdef mingw32_HOST_OS
852 INFO_TABLE_RET( stg_block_async, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
857 ares = StgTSO_block_info(CurrentTSO);
858 len = StgAsyncIOResult_len(ares);
859 errC = StgAsyncIOResult_errCode(ares);
860 StgTSO_block_info(CurrentTSO) = NULL;
861 foreign "C" free(ares "ptr");
864 jump %ENTRY_CODE(Sp(1));
870 Sp(0) = stg_block_async_info;
874 /* Used by threadDelay implementation; it would be desirable to get rid of
875 * this free()'ing void return continuation.
877 INFO_TABLE_RET( stg_block_async_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
881 ares = StgTSO_block_info(CurrentTSO);
882 StgTSO_block_info(CurrentTSO) = NULL;
883 foreign "C" free(ares "ptr");
885 jump %ENTRY_CODE(Sp(0));
891 Sp(0) = stg_block_async_void_info;