1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
15 /* Stack/Heap Check Failure
16 * ------------------------
18 * On discovering that a stack or heap check has failed, we do the following:
20 * - If the context_switch flag is set, indicating that there are more
21 * threads waiting to run, we yield to the scheduler
22 * (return ThreadYielding).
24 * - If Hp > HpLim, we've had a heap check failure. This means we've
25 * come to the end of the current heap block, so we try to chain
26 * another block on with ExtendNursery().
28 * - If this succeeds, we carry on without returning to the
31 * - If it fails, we return to the scheduler claiming HeapOverflow
32 * so that a garbage collection can be performed.
34 * - If Hp <= HpLim, it must have been a stack check that failed. In
35 * which case, we return to the scheduler claiming StackOverflow, the
36 * scheduler will either increase the size of our stack, or raise
37 * an exception if the stack is already too big.
39 * The effect of checking for context switch only in the heap/stack check
40 * failure code is that we'll switch threads after the current thread has
41 * reached the end of its heap block. If a thread isn't allocating
42 * at all, it won't yield. Hopefully this won't be a problem in practice.
45 /* Remember that the return address is *removed* when returning to a
46 * ThreadRunGHC thread.
50 DEBUG_ONLY(foreign "C" heapCheckFail()); \
52 Hp = Hp - HpAlloc/*in bytes*/; \
53 if (HpAlloc <= BLOCK_SIZE \
54 && bdescr_link(CurrentNursery) != NULL) { \
56 CurrentNursery = bdescr_link(CurrentNursery); \
58 if (CInt[context_switch] != 0 :: CInt) { \
59 R1 = ThreadYielding; \
62 jump %ENTRY_CODE(Sp(0)); \
72 SAVE_THREAD_STATE(); \
73 StgTSO_what_next(CurrentTSO) = ThreadRunGHC::I16; \
76 #define RETURN_TO_SCHED(why,what_next) \
77 SAVE_THREAD_STATE(); \
78 StgTSO_what_next(CurrentTSO) = what_next::I16; \
82 #define HP_GENERIC RETURN_TO_SCHED(HeapOverflow, ThreadRunGHC)
83 #define YIELD_GENERIC RETURN_TO_SCHED(ThreadYielding, ThreadRunGHC)
84 #define YIELD_TO_INTERPRETER RETURN_TO_SCHED(ThreadYielding, ThreadInterpret)
85 #define BLOCK_GENERIC RETURN_TO_SCHED(ThreadBlocked, ThreadRunGHC)
87 /* -----------------------------------------------------------------------------
88 Heap checks in thunks/functions.
90 In these cases, node always points to the function closure. This gives
91 us an easy way to return to the function: just leave R1 on the top of
92 the stack, and have the scheduler enter it to return.
94 There are canned sequences for 'n' pointer values in registers.
95 -------------------------------------------------------------------------- */
97 INFO_TABLE_RET( stg_enter, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
108 Sp(0) = stg_enter_info;
113 stg_gc_enter_1_hponly
119 TSO_what_next(CurrentTSO) = ThreadRunGHC::I16;
126 ToDo: merge the block and yield macros, calling something like BLOCK(N)
131 Should we actually ever do a yield in such a case?? -- HWL
136 TSO_what_next(CurrentTSO) = ThreadRunGHC;
146 TSO_what_next(CurrentTSO) = ThreadRunGHC;
151 /*- 2 Regs--------------------------------------------------------------------*/
159 TSO_what_next(CurrentTSO) = ThreadRunGHC;
164 /*- 3 Regs -------------------------------------------------------------------*/
173 TSO_what_next(CurrentTSO) = ThreadRunGHC;
178 /*- 4 Regs -------------------------------------------------------------------*/
188 TSO_what_next(CurrentTSO) = ThreadRunGHC;
193 /*- 5 Regs -------------------------------------------------------------------*/
204 TSO_what_next(CurrentTSO) = ThreadRunGHC;
209 /*- 6 Regs -------------------------------------------------------------------*/
221 TSO_what_next(CurrentTSO) = ThreadRunGHC;
226 /*- 7 Regs -------------------------------------------------------------------*/
239 TSO_what_next(CurrentTSO) = ThreadRunGHC;
244 /*- 8 Regs -------------------------------------------------------------------*/
258 TSO_what_next(CurrentTSO) = ThreadRunGHC;
263 // the same routines but with a block rather than a yield
270 TSO_what_next(CurrentTSO) = ThreadRunGHC;
275 /*- 2 Regs--------------------------------------------------------------------*/
283 TSO_what_next(CurrentTSO) = ThreadRunGHC;
288 /*- 3 Regs -------------------------------------------------------------------*/
297 TSO_what_next(CurrentTSO) = ThreadRunGHC;
302 /*- 4 Regs -------------------------------------------------------------------*/
312 TSO_what_next(CurrentTSO) = ThreadRunGHC;
317 /*- 5 Regs -------------------------------------------------------------------*/
328 TSO_what_next(CurrentTSO) = ThreadRunGHC;
333 /*- 6 Regs -------------------------------------------------------------------*/
345 TSO_what_next(CurrentTSO) = ThreadRunGHC;
350 /*- 7 Regs -------------------------------------------------------------------*/
363 TSO_what_next(CurrentTSO) = ThreadRunGHC;
368 /*- 8 Regs -------------------------------------------------------------------*/
382 TSO_what_next(CurrentTSO) = ThreadRunGHC;
389 #if 0 && defined(PAR)
392 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
393 saving of the thread state from the actual jump via an StgReturn.
394 We need this separation because we call RTS routines in blocking entry codes
395 before jumping back into the RTS (see parallel/FetchMe.hc).
407 TSO_what_next(CurrentTSO) = ThreadRunGHC;
414 /* -----------------------------------------------------------------------------
415 Heap checks in Primitive case alternatives
417 A primitive case alternative is entered with a value either in
418 R1, FloatReg1 or D1 depending on the return convention. All the
419 cases are covered below.
420 -------------------------------------------------------------------------- */
422 /*-- No Registers live ------------------------------------------------------ */
429 /*-- void return ------------------------------------------------------------ */
431 INFO_TABLE_RET( stg_gc_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL)
434 jump %ENTRY_CODE(Sp(0));
437 /*-- R1 is boxed/unpointed -------------------------------------------------- */
439 INFO_TABLE_RET( stg_gc_unpt_r1, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
443 jump %ENTRY_CODE(Sp(0));
450 Sp(0) = stg_gc_unpt_r1_info;
454 /*-- R1 is unboxed -------------------------------------------------- */
456 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
457 INFO_TABLE_RET( stg_gc_unbx_r1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
461 jump %ENTRY_CODE(Sp(0));
468 Sp(0) = stg_gc_unbx_r1_info;
472 /*-- F1 contains a float ------------------------------------------------- */
474 INFO_TABLE_RET( stg_gc_f1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
478 jump %ENTRY_CODE(Sp(0));
484 F_[Sp + WDS(1)] = F1;
485 Sp(0) = stg_gc_f1_info;
489 /*-- D1 contains a double ------------------------------------------------- */
491 /* we support doubles of either 1 or 2 words in size */
493 #if SIZEOF_DOUBLE == SIZEOF_VOID_P
494 # define DBL_BITMAP 1
497 # define DBL_BITMAP 3
501 INFO_TABLE_RET( stg_gc_d1, DBL_WORDS/*framesize*/, DBL_BITMAP/*bitmap*/, RET_SMALL )
503 D1 = D_[Sp + WDS(1)];
504 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
505 jump %ENTRY_CODE(Sp(0));
510 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
511 D_[Sp + WDS(1)] = D1;
512 Sp(0) = stg_gc_d1_info;
517 /*-- L1 contains an int64 ------------------------------------------------- */
519 /* we support int64s of either 1 or 2 words in size */
521 #if SIZEOF_VOID_P == 8
522 # define LLI_BITMAP 1
525 # define LLI_BITMAP 3
529 INFO_TABLE_RET( stg_gc_l1, LLI_WORDS/*framesize*/, LLI_BITMAP/*bitmap*/, RET_SMALL )
531 L1 = L_[Sp + WDS(1)];
532 Sp_adj(1) + SIZEOF_StgWord64;
533 jump %ENTRY_CODE(Sp(0));
538 Sp_adj(-1) - SIZEOF_StgWord64;
539 L_[Sp + WDS(1)] = L1;
540 Sp(0) = stg_gc_l1_info;
544 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
546 INFO_TABLE_RET( stg_ut_1_0_unreg, 1/*size*/, 0/*BITMAP*/, RET_SMALL )
549 // one ptr is on the stack (Sp(0))
550 jump %ENTRY_CODE(Sp(1));
553 /* -----------------------------------------------------------------------------
554 Generic function entry heap check code.
556 At a function entry point, the arguments are as per the calling convention,
557 i.e. some in regs and some on the stack. There may or may not be
558 a pointer to the function closure in R1 - if there isn't, then the heap
559 check failure code in the function will arrange to load it.
561 The function's argument types are described in its info table, so we
562 can just jump to this bit of generic code to save away all the
563 registers and return to the scheduler.
565 This code arranges the stack like this:
569 +---------------------+
571 +---------------------+
573 +---------------------+
575 +---------------------+
577 The size is the number of words of arguments on the stack, and is cached
578 in the frame in order to simplify stack walking: otherwise the size of
579 this stack frame would have to be calculated by looking at f's info table.
581 -------------------------------------------------------------------------- */
589 info = %GET_FUN_INFO(R1);
592 type = TO_W_(StgFunInfoExtra_fun_type(info));
593 if (type == ARG_GEN) {
594 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
596 if (type == ARG_GEN_BIG) {
597 #ifdef TABLES_NEXT_TO_CODE
598 // bitmap field holds an offset
599 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
600 + %GET_ENTRY(R1) /* ### */ );
602 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
605 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
610 // we don't have to save any registers away
614 Sp(0) = stg_gc_fun_info;
618 type = TO_W_(StgFunInfoExtra_fun_type(info));
620 if (type == ARG_GEN || type == ARG_GEN_BIG) {
621 // regs already saved by the heap check code
625 Sp(0) = stg_gc_fun_info;
626 // DEBUG_ONLY(foreign "C" fprintf(stderr, "stg_fun_gc_gen(ARG_GEN)"););
629 jump W_[stg_stack_save_entries + WDS(type)];
630 // jumps to stg_gc_noregs after saving stuff
632 #endif // !NO_ARG_REGS
635 /* -----------------------------------------------------------------------------
636 Generic Apply (return point)
638 The dual to stg_fun_gc_gen (above): this fragment returns to the
639 function, passing arguments in the stack and in registers
640 appropriately. The stack layout is given above.
641 -------------------------------------------------------------------------- */
643 INFO_TABLE_RET( stg_gc_fun, 0/*framesize*/, 0/*bitmap*/, RET_FUN )
648 // Minor optimisation: there are no argument registers to load up,
649 // so we can just jump straight to the function's entry point.
655 info = %GET_FUN_INFO(R1);
656 type = TO_W_(StgFunInfoExtra_fun_type(info));
657 if (type == ARG_GEN || type == ARG_GEN_BIG) {
658 jump StgFunInfoExtra_slow_apply(info);
660 if (type == ARG_BCO) {
661 // cover this case just to be on the safe side
664 Sp(0) = stg_apply_interp_info;
665 jump stg_yield_to_interpreter;
667 jump W_[stg_ap_stack_entries + WDS(type)];
673 /* -----------------------------------------------------------------------------
674 Generic Heap Check Code.
676 Called with Liveness mask in R9, Return address in R10.
677 Stack must be consistent (containing all necessary info pointers
680 See StgMacros.h for a description of the RET_DYN stack frame.
682 We also define an stg_gen_yield here, because it's very similar.
683 -------------------------------------------------------------------------- */
685 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
686 // on a 64-bit machine, we'll end up wasting a couple of words, but
687 // it's not a big deal.
689 #define RESTORE_EVERYTHING \
690 L1 = L_[Sp + WDS(19)]; \
691 D2 = D_[Sp + WDS(17)]; \
692 D1 = D_[Sp + WDS(15)]; \
693 F4 = F_[Sp + WDS(14)]; \
694 F3 = F_[Sp + WDS(13)]; \
695 F2 = F_[Sp + WDS(12)]; \
696 F1 = F_[Sp + WDS(11)]; \
707 #define RET_OFFSET (-19)
709 #define SAVE_EVERYTHING \
711 L_[Sp + WDS(19)] = L1; \
712 D_[Sp + WDS(17)] = D2; \
713 D_[Sp + WDS(15)] = D1; \
714 F_[Sp + WDS(14)] = F4; \
715 F_[Sp + WDS(13)] = F3; \
716 F_[Sp + WDS(12)] = F2; \
717 F_[Sp + WDS(11)] = F1; \
726 Sp(2) = R10.w; /* return address */ \
727 Sp(1) = R9; /* liveness mask */ \
728 Sp(0) = stg_gc_gen_info;
730 INFO_TABLE_RET( stg_gc_gen, 0/*framesize*/, 0/*bitmap*/, RET_DYN )
731 /* bitmap in the above info table is unused, the real one is on the stack. */
734 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
743 // A heap check at an unboxed tuple return point. The return address
744 // is on the stack, and we can find it by using the offsets given
745 // to us in the liveness mask.
748 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
754 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
755 * because we've just failed doYouWantToGC(), not a standard heap
756 * check. GC_GENERIC would end up returning StackOverflow.
764 /* -----------------------------------------------------------------------------
766 -------------------------------------------------------------------------- */
779 /* -----------------------------------------------------------------------------
780 Yielding to the interpreter... top of stack says what to do next.
781 -------------------------------------------------------------------------- */
783 stg_yield_to_interpreter
785 YIELD_TO_INTERPRETER;
788 /* -----------------------------------------------------------------------------
790 -------------------------------------------------------------------------- */
807 Sp(0) = stg_enter_info;
811 /* -----------------------------------------------------------------------------
812 * takeMVar/putMVar-specific blocks
814 * Stack layout for a thread blocked in takeMVar:
818 * stg_block_takemvar_info
820 * Stack layout for a thread blocked in putMVar:
825 * stg_block_putmvar_info
827 * See PrimOps.hc for a description of the workings of take/putMVar.
829 * -------------------------------------------------------------------------- */
831 INFO_TABLE_RET( stg_block_takemvar, 1/*framesize*/, 0/*bitmap*/, RET_SMALL )
835 jump takeMVarzh_fast;
842 Sp(0) = stg_block_takemvar_info;
846 INFO_TABLE_RET( stg_block_putmvar, 2/*framesize*/, 0/*bitmap*/, RET_SMALL )
859 Sp(0) = stg_block_putmvar_info;
863 #ifdef mingw32_HOST_OS
864 INFO_TABLE_RET( stg_block_async, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
869 ares = StgTSO_block_info(CurrentTSO);
870 len = StgAsyncIOResult_len(ares);
871 errC = StgAsyncIOResult_errCode(ares);
872 StgTSO_block_info(CurrentTSO) = NULL;
873 foreign "C" free(ares "ptr");
876 jump %ENTRY_CODE(Sp(1));
882 Sp(0) = stg_block_async_info;
886 /* Used by threadDelay implementation; it would be desirable to get rid of
887 * this free()'ing void return continuation.
889 INFO_TABLE_RET( stg_block_async_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
893 ares = StgTSO_block_info(CurrentTSO);
894 StgTSO_block_info(CurrentTSO) = NULL;
895 foreign "C" free(ares "ptr");
897 jump %ENTRY_CODE(Sp(0));
903 Sp(0) = stg_block_async_void_info;