1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
15 /* Stack/Heap Check Failure
16 * ------------------------
18 * On discovering that a stack or heap check has failed, we do the following:
20 * - If the context_switch flag is set, indicating that there are more
21 * threads waiting to run, we yield to the scheduler
22 * (return ThreadYielding).
24 * - If Hp > HpLim, we've had a heap check failure. This means we've
25 * come to the end of the current heap block, so we try to chain
26 * another block on with ExtendNursery().
28 * - If this succeeds, we carry on without returning to the
31 * - If it fails, we return to the scheduler claiming HeapOverflow
32 * so that a garbage collection can be performed.
34 * - If Hp <= HpLim, it must have been a stack check that failed. In
35 * which case, we return to the scheduler claiming StackOverflow, the
36 * scheduler will either increase the size of our stack, or raise
37 * an exception if the stack is already too big.
39 * The effect of checking for context switch only in the heap/stack check
40 * failure code is that we'll switch threads after the current thread has
41 * reached the end of its heap block. If a thread isn't allocating
42 * at all, it won't yield. Hopefully this won't be a problem in practice.
45 /* Remember that the return address is *removed* when returning to a
46 * ThreadRunGHC thread.
50 DEBUG_ONLY(foreign "C" heapCheckFail()); \
52 Hp = Hp - HpAlloc/*in bytes*/; \
53 if (HpAlloc <= BLOCK_SIZE \
54 && bdescr_link(CurrentNursery) != NULL) { \
56 CurrentNursery = bdescr_link(CurrentNursery); \
58 if (CInt[context_switch] != 0 :: CInt) { \
59 R1 = ThreadYielding; \
62 jump %ENTRY_CODE(Sp(0)); \
72 StgTSO_what_next(CurrentTSO) = ThreadRunGHC::I16; \
73 jump stg_returnToSched;
75 #define PRE_RETURN(why,what_next) \
76 StgTSO_what_next(CurrentTSO) = what_next::I16; \
80 PRE_RETURN(HeapOverflow, ThreadRunGHC) \
81 jump stg_returnToSched;
83 #define BLOCK_GENERIC \
84 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
85 jump stg_returnToSched;
87 #define YIELD_GENERIC \
88 PRE_RETURN(ThreadYielding, ThreadRunGHC) \
89 jump stg_returnToSched;
91 #define BLOCK_BUT_FIRST(c) \
92 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
94 jump stg_returnToSchedButFirst;
96 #define YIELD_TO_INTERPRETER \
97 PRE_RETURN(ThreadYielding, ThreadInterpret) \
98 jump stg_returnToSchedNotPaused;
100 /* -----------------------------------------------------------------------------
101 Heap checks in thunks/functions.
103 In these cases, node always points to the function closure. This gives
104 us an easy way to return to the function: just leave R1 on the top of
105 the stack, and have the scheduler enter it to return.
107 There are canned sequences for 'n' pointer values in registers.
108 -------------------------------------------------------------------------- */
110 INFO_TABLE_RET( stg_enter, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
121 Sp(0) = stg_enter_info;
127 ToDo: merge the block and yield macros, calling something like BLOCK(N)
132 Should we actually ever do a yield in such a case?? -- HWL
137 TSO_what_next(CurrentTSO) = ThreadRunGHC;
147 TSO_what_next(CurrentTSO) = ThreadRunGHC;
152 /*- 2 Regs--------------------------------------------------------------------*/
160 TSO_what_next(CurrentTSO) = ThreadRunGHC;
165 /*- 3 Regs -------------------------------------------------------------------*/
174 TSO_what_next(CurrentTSO) = ThreadRunGHC;
179 /*- 4 Regs -------------------------------------------------------------------*/
189 TSO_what_next(CurrentTSO) = ThreadRunGHC;
194 /*- 5 Regs -------------------------------------------------------------------*/
205 TSO_what_next(CurrentTSO) = ThreadRunGHC;
210 /*- 6 Regs -------------------------------------------------------------------*/
222 TSO_what_next(CurrentTSO) = ThreadRunGHC;
227 /*- 7 Regs -------------------------------------------------------------------*/
240 TSO_what_next(CurrentTSO) = ThreadRunGHC;
245 /*- 8 Regs -------------------------------------------------------------------*/
259 TSO_what_next(CurrentTSO) = ThreadRunGHC;
264 // the same routines but with a block rather than a yield
271 TSO_what_next(CurrentTSO) = ThreadRunGHC;
276 /*- 2 Regs--------------------------------------------------------------------*/
284 TSO_what_next(CurrentTSO) = ThreadRunGHC;
289 /*- 3 Regs -------------------------------------------------------------------*/
298 TSO_what_next(CurrentTSO) = ThreadRunGHC;
303 /*- 4 Regs -------------------------------------------------------------------*/
313 TSO_what_next(CurrentTSO) = ThreadRunGHC;
318 /*- 5 Regs -------------------------------------------------------------------*/
329 TSO_what_next(CurrentTSO) = ThreadRunGHC;
334 /*- 6 Regs -------------------------------------------------------------------*/
346 TSO_what_next(CurrentTSO) = ThreadRunGHC;
351 /*- 7 Regs -------------------------------------------------------------------*/
364 TSO_what_next(CurrentTSO) = ThreadRunGHC;
369 /*- 8 Regs -------------------------------------------------------------------*/
383 TSO_what_next(CurrentTSO) = ThreadRunGHC;
390 #if 0 && defined(PAR)
393 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
394 saving of the thread state from the actual jump via an StgReturn.
395 We need this separation because we call RTS routines in blocking entry codes
396 before jumping back into the RTS (see parallel/FetchMe.hc).
408 TSO_what_next(CurrentTSO) = ThreadRunGHC;
415 /* -----------------------------------------------------------------------------
416 Heap checks in Primitive case alternatives
418 A primitive case alternative is entered with a value either in
419 R1, FloatReg1 or D1 depending on the return convention. All the
420 cases are covered below.
421 -------------------------------------------------------------------------- */
423 /*-- No Registers live ------------------------------------------------------ */
430 /*-- void return ------------------------------------------------------------ */
432 INFO_TABLE_RET( stg_gc_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL)
435 jump %ENTRY_CODE(Sp(0));
438 /*-- R1 is boxed/unpointed -------------------------------------------------- */
440 INFO_TABLE_RET( stg_gc_unpt_r1, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
444 jump %ENTRY_CODE(Sp(0));
451 Sp(0) = stg_gc_unpt_r1_info;
455 /*-- R1 is unboxed -------------------------------------------------- */
457 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
458 INFO_TABLE_RET( stg_gc_unbx_r1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
462 jump %ENTRY_CODE(Sp(0));
469 Sp(0) = stg_gc_unbx_r1_info;
473 /*-- F1 contains a float ------------------------------------------------- */
475 INFO_TABLE_RET( stg_gc_f1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
479 jump %ENTRY_CODE(Sp(0));
485 F_[Sp + WDS(1)] = F1;
486 Sp(0) = stg_gc_f1_info;
490 /*-- D1 contains a double ------------------------------------------------- */
492 /* we support doubles of either 1 or 2 words in size */
494 #if SIZEOF_DOUBLE == SIZEOF_VOID_P
495 # define DBL_BITMAP 1
498 # define DBL_BITMAP 3
502 INFO_TABLE_RET( stg_gc_d1, DBL_WORDS/*framesize*/, DBL_BITMAP/*bitmap*/, RET_SMALL )
504 D1 = D_[Sp + WDS(1)];
505 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
506 jump %ENTRY_CODE(Sp(0));
511 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
512 D_[Sp + WDS(1)] = D1;
513 Sp(0) = stg_gc_d1_info;
518 /*-- L1 contains an int64 ------------------------------------------------- */
520 /* we support int64s of either 1 or 2 words in size */
522 #if SIZEOF_VOID_P == 8
523 # define LLI_BITMAP 1
526 # define LLI_BITMAP 3
530 INFO_TABLE_RET( stg_gc_l1, LLI_WORDS/*framesize*/, LLI_BITMAP/*bitmap*/, RET_SMALL )
532 L1 = L_[Sp + WDS(1)];
533 Sp_adj(1) + SIZEOF_StgWord64;
534 jump %ENTRY_CODE(Sp(0));
539 Sp_adj(-1) - SIZEOF_StgWord64;
540 L_[Sp + WDS(1)] = L1;
541 Sp(0) = stg_gc_l1_info;
545 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
547 INFO_TABLE_RET( stg_ut_1_0_unreg, 1/*size*/, 0/*BITMAP*/, RET_SMALL )
550 // one ptr is on the stack (Sp(0))
551 jump %ENTRY_CODE(Sp(1));
554 /* -----------------------------------------------------------------------------
555 Generic function entry heap check code.
557 At a function entry point, the arguments are as per the calling convention,
558 i.e. some in regs and some on the stack. There may or may not be
559 a pointer to the function closure in R1 - if there isn't, then the heap
560 check failure code in the function will arrange to load it.
562 The function's argument types are described in its info table, so we
563 can just jump to this bit of generic code to save away all the
564 registers and return to the scheduler.
566 This code arranges the stack like this:
570 +---------------------+
572 +---------------------+
574 +---------------------+
576 +---------------------+
578 The size is the number of words of arguments on the stack, and is cached
579 in the frame in order to simplify stack walking: otherwise the size of
580 this stack frame would have to be calculated by looking at f's info table.
582 -------------------------------------------------------------------------- */
590 info = %GET_FUN_INFO(R1);
593 type = TO_W_(StgFunInfoExtra_fun_type(info));
594 if (type == ARG_GEN) {
595 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
597 if (type == ARG_GEN_BIG) {
598 #ifdef TABLES_NEXT_TO_CODE
599 // bitmap field holds an offset
600 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
601 + %GET_ENTRY(R1) /* ### */ );
603 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
606 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
611 // we don't have to save any registers away
615 Sp(0) = stg_gc_fun_info;
619 type = TO_W_(StgFunInfoExtra_fun_type(info));
621 if (type == ARG_GEN || type == ARG_GEN_BIG) {
622 // regs already saved by the heap check code
626 Sp(0) = stg_gc_fun_info;
627 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
630 jump W_[stg_stack_save_entries + WDS(type)];
631 // jumps to stg_gc_noregs after saving stuff
633 #endif /* !NO_ARG_REGS */
636 /* -----------------------------------------------------------------------------
637 Generic Apply (return point)
639 The dual to stg_fun_gc_gen (above): this fragment returns to the
640 function, passing arguments in the stack and in registers
641 appropriately. The stack layout is given above.
642 -------------------------------------------------------------------------- */
644 INFO_TABLE_RET( stg_gc_fun, 0/*framesize*/, 0/*bitmap*/, RET_FUN )
649 // Minor optimisation: there are no argument registers to load up,
650 // so we can just jump straight to the function's entry point.
656 info = %GET_FUN_INFO(R1);
657 type = TO_W_(StgFunInfoExtra_fun_type(info));
658 if (type == ARG_GEN || type == ARG_GEN_BIG) {
659 jump StgFunInfoExtra_slow_apply(info);
661 if (type == ARG_BCO) {
662 // cover this case just to be on the safe side
665 Sp(0) = stg_apply_interp_info;
666 jump stg_yield_to_interpreter;
668 jump W_[stg_ap_stack_entries + WDS(type)];
674 /* -----------------------------------------------------------------------------
675 Generic Heap Check Code.
677 Called with Liveness mask in R9, Return address in R10.
678 Stack must be consistent (containing all necessary info pointers
681 See StgMacros.h for a description of the RET_DYN stack frame.
683 We also define an stg_gen_yield here, because it's very similar.
684 -------------------------------------------------------------------------- */
686 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
687 // on a 64-bit machine, we'll end up wasting a couple of words, but
688 // it's not a big deal.
690 #define RESTORE_EVERYTHING \
691 L1 = L_[Sp + WDS(19)]; \
692 D2 = D_[Sp + WDS(17)]; \
693 D1 = D_[Sp + WDS(15)]; \
694 F4 = F_[Sp + WDS(14)]; \
695 F3 = F_[Sp + WDS(13)]; \
696 F2 = F_[Sp + WDS(12)]; \
697 F1 = F_[Sp + WDS(11)]; \
708 #define RET_OFFSET (-19)
710 #define SAVE_EVERYTHING \
712 L_[Sp + WDS(19)] = L1; \
713 D_[Sp + WDS(17)] = D2; \
714 D_[Sp + WDS(15)] = D1; \
715 F_[Sp + WDS(14)] = F4; \
716 F_[Sp + WDS(13)] = F3; \
717 F_[Sp + WDS(12)] = F2; \
718 F_[Sp + WDS(11)] = F1; \
727 Sp(2) = R10; /* return address */ \
728 Sp(1) = R9; /* liveness mask */ \
729 Sp(0) = stg_gc_gen_info;
731 INFO_TABLE_RET( stg_gc_gen, 0/*framesize*/, 0/*bitmap*/, RET_DYN )
732 /* bitmap in the above info table is unused, the real one is on the stack. */
735 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
744 // A heap check at an unboxed tuple return point. The return address
745 // is on the stack, and we can find it by using the offsets given
746 // to us in the liveness mask.
749 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
755 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
756 * because we've just failed doYouWantToGC(), not a standard heap
757 * check. GC_GENERIC would end up returning StackOverflow.
765 /* -----------------------------------------------------------------------------
767 -------------------------------------------------------------------------- */
780 /* -----------------------------------------------------------------------------
781 Yielding to the interpreter... top of stack says what to do next.
782 -------------------------------------------------------------------------- */
784 stg_yield_to_interpreter
786 YIELD_TO_INTERPRETER;
789 /* -----------------------------------------------------------------------------
791 -------------------------------------------------------------------------- */
808 Sp(0) = stg_enter_info;
812 /* -----------------------------------------------------------------------------
813 * takeMVar/putMVar-specific blocks
815 * Stack layout for a thread blocked in takeMVar:
819 * stg_block_takemvar_info
821 * Stack layout for a thread blocked in putMVar:
826 * stg_block_putmvar_info
828 * See PrimOps.hc for a description of the workings of take/putMVar.
830 * -------------------------------------------------------------------------- */
832 INFO_TABLE_RET( stg_block_takemvar, 1/*framesize*/, 0/*bitmap*/, RET_SMALL )
836 jump takeMVarzh_fast;
839 // code fragment executed just before we return to the scheduler
840 stg_block_takemvar_finally
843 foreign "C" unlockClosure(R3 "ptr", stg_EMPTY_MVAR_info);
852 Sp(0) = stg_block_takemvar_info;
854 BLOCK_BUT_FIRST(stg_block_takemvar_finally);
857 INFO_TABLE_RET( stg_block_putmvar, 2/*framesize*/, 0/*bitmap*/, RET_SMALL )
865 // code fragment executed just before we return to the scheduler
866 stg_block_putmvar_finally
869 foreign "C" unlockClosure(R3 "ptr", stg_FULL_MVAR_info);
879 Sp(0) = stg_block_putmvar_info;
881 BLOCK_BUT_FIRST(stg_block_putmvar_finally);
884 #ifdef mingw32_HOST_OS
885 INFO_TABLE_RET( stg_block_async, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
890 ares = StgTSO_block_info(CurrentTSO);
891 len = StgAsyncIOResult_len(ares);
892 errC = StgAsyncIOResult_errCode(ares);
893 StgTSO_block_info(CurrentTSO) = NULL;
894 foreign "C" free(ares "ptr");
897 jump %ENTRY_CODE(Sp(1));
903 Sp(0) = stg_block_async_info;
907 /* Used by threadDelay implementation; it would be desirable to get rid of
908 * this free()'ing void return continuation.
910 INFO_TABLE_RET( stg_block_async_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
914 ares = StgTSO_block_info(CurrentTSO);
915 StgTSO_block_info(CurrentTSO) = NULL;
916 foreign "C" free(ares "ptr");
918 jump %ENTRY_CODE(Sp(0));
924 Sp(0) = stg_block_async_void_info;