1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
15 /* Stack/Heap Check Failure
16 * ------------------------
18 * On discovering that a stack or heap check has failed, we do the following:
20 * - If the context_switch flag is set, indicating that there are more
21 * threads waiting to run, we yield to the scheduler
22 * (return ThreadYielding).
24 * - If Hp > HpLim, we've had a heap check failure. This means we've
25 * come to the end of the current heap block, so we try to chain
26 * another block on with ExtendNursery().
28 * - If this succeeds, we carry on without returning to the
31 * - If it fails, we return to the scheduler claiming HeapOverflow
32 * so that a garbage collection can be performed.
34 * - If Hp <= HpLim, it must have been a stack check that failed. In
35 * which case, we return to the scheduler claiming StackOverflow, the
36 * scheduler will either increase the size of our stack, or raise
37 * an exception if the stack is already too big.
39 * The effect of checking for context switch only in the heap/stack check
40 * failure code is that we'll switch threads after the current thread has
41 * reached the end of its heap block. If a thread isn't allocating
42 * at all, it won't yield. Hopefully this won't be a problem in practice.
45 /* Remember that the return address is *removed* when returning to a
46 * ThreadRunGHC thread.
50 DEBUG_ONLY(foreign "C" heapCheckFail()); \
52 Hp = Hp - HpAlloc/*in bytes*/; \
53 if (HpAlloc <= BLOCK_SIZE \
54 && bdescr_link(CurrentNursery) != NULL) { \
56 CurrentNursery = bdescr_link(CurrentNursery); \
58 if (CInt[context_switch] != 0 :: CInt) { \
59 R1 = ThreadYielding; \
62 jump %ENTRY_CODE(Sp(0)); \
72 StgTSO_what_next(CurrentTSO) = ThreadRunGHC::I16; \
73 jump stg_returnToSched;
75 #define RETURN_TO_SCHED(why,what_next) \
76 StgTSO_what_next(CurrentTSO) = what_next::I16; \
78 jump stg_returnToSched;
80 #define RETURN_TO_SCHED_BUT_FIRST(why,what_next,cont) \
81 StgTSO_what_next(CurrentTSO) = what_next::I16; \
84 jump stg_returnToSchedButFirst;
86 #define HP_GENERIC RETURN_TO_SCHED(HeapOverflow, ThreadRunGHC)
87 #define YIELD_GENERIC RETURN_TO_SCHED(ThreadYielding, ThreadRunGHC)
88 #define YIELD_TO_INTERPRETER RETURN_TO_SCHED(ThreadYielding, ThreadInterpret)
89 #define BLOCK_GENERIC RETURN_TO_SCHED(ThreadBlocked, ThreadRunGHC)
90 #define BLOCK_BUT_FIRST(c) RETURN_TO_SCHED_BUT_FIRST(ThreadBlocked, ThreadRunGHC, c)
92 /* -----------------------------------------------------------------------------
93 Heap checks in thunks/functions.
95 In these cases, node always points to the function closure. This gives
96 us an easy way to return to the function: just leave R1 on the top of
97 the stack, and have the scheduler enter it to return.
99 There are canned sequences for 'n' pointer values in registers.
100 -------------------------------------------------------------------------- */
102 INFO_TABLE_RET( stg_enter, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
113 Sp(0) = stg_enter_info;
119 ToDo: merge the block and yield macros, calling something like BLOCK(N)
124 Should we actually ever do a yield in such a case?? -- HWL
129 TSO_what_next(CurrentTSO) = ThreadRunGHC;
139 TSO_what_next(CurrentTSO) = ThreadRunGHC;
144 /*- 2 Regs--------------------------------------------------------------------*/
152 TSO_what_next(CurrentTSO) = ThreadRunGHC;
157 /*- 3 Regs -------------------------------------------------------------------*/
166 TSO_what_next(CurrentTSO) = ThreadRunGHC;
171 /*- 4 Regs -------------------------------------------------------------------*/
181 TSO_what_next(CurrentTSO) = ThreadRunGHC;
186 /*- 5 Regs -------------------------------------------------------------------*/
197 TSO_what_next(CurrentTSO) = ThreadRunGHC;
202 /*- 6 Regs -------------------------------------------------------------------*/
214 TSO_what_next(CurrentTSO) = ThreadRunGHC;
219 /*- 7 Regs -------------------------------------------------------------------*/
232 TSO_what_next(CurrentTSO) = ThreadRunGHC;
237 /*- 8 Regs -------------------------------------------------------------------*/
251 TSO_what_next(CurrentTSO) = ThreadRunGHC;
256 // the same routines but with a block rather than a yield
263 TSO_what_next(CurrentTSO) = ThreadRunGHC;
268 /*- 2 Regs--------------------------------------------------------------------*/
276 TSO_what_next(CurrentTSO) = ThreadRunGHC;
281 /*- 3 Regs -------------------------------------------------------------------*/
290 TSO_what_next(CurrentTSO) = ThreadRunGHC;
295 /*- 4 Regs -------------------------------------------------------------------*/
305 TSO_what_next(CurrentTSO) = ThreadRunGHC;
310 /*- 5 Regs -------------------------------------------------------------------*/
321 TSO_what_next(CurrentTSO) = ThreadRunGHC;
326 /*- 6 Regs -------------------------------------------------------------------*/
338 TSO_what_next(CurrentTSO) = ThreadRunGHC;
343 /*- 7 Regs -------------------------------------------------------------------*/
356 TSO_what_next(CurrentTSO) = ThreadRunGHC;
361 /*- 8 Regs -------------------------------------------------------------------*/
375 TSO_what_next(CurrentTSO) = ThreadRunGHC;
382 #if 0 && defined(PAR)
385 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
386 saving of the thread state from the actual jump via an StgReturn.
387 We need this separation because we call RTS routines in blocking entry codes
388 before jumping back into the RTS (see parallel/FetchMe.hc).
400 TSO_what_next(CurrentTSO) = ThreadRunGHC;
407 /* -----------------------------------------------------------------------------
408 Heap checks in Primitive case alternatives
410 A primitive case alternative is entered with a value either in
411 R1, FloatReg1 or D1 depending on the return convention. All the
412 cases are covered below.
413 -------------------------------------------------------------------------- */
415 /*-- No Registers live ------------------------------------------------------ */
422 /*-- void return ------------------------------------------------------------ */
424 INFO_TABLE_RET( stg_gc_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL)
427 jump %ENTRY_CODE(Sp(0));
430 /*-- R1 is boxed/unpointed -------------------------------------------------- */
432 INFO_TABLE_RET( stg_gc_unpt_r1, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
436 jump %ENTRY_CODE(Sp(0));
443 Sp(0) = stg_gc_unpt_r1_info;
447 /*-- R1 is unboxed -------------------------------------------------- */
449 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
450 INFO_TABLE_RET( stg_gc_unbx_r1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
454 jump %ENTRY_CODE(Sp(0));
461 Sp(0) = stg_gc_unbx_r1_info;
465 /*-- F1 contains a float ------------------------------------------------- */
467 INFO_TABLE_RET( stg_gc_f1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
471 jump %ENTRY_CODE(Sp(0));
477 F_[Sp + WDS(1)] = F1;
478 Sp(0) = stg_gc_f1_info;
482 /*-- D1 contains a double ------------------------------------------------- */
484 /* we support doubles of either 1 or 2 words in size */
486 #if SIZEOF_DOUBLE == SIZEOF_VOID_P
487 # define DBL_BITMAP 1
490 # define DBL_BITMAP 3
494 INFO_TABLE_RET( stg_gc_d1, DBL_WORDS/*framesize*/, DBL_BITMAP/*bitmap*/, RET_SMALL )
496 D1 = D_[Sp + WDS(1)];
497 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
498 jump %ENTRY_CODE(Sp(0));
503 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
504 D_[Sp + WDS(1)] = D1;
505 Sp(0) = stg_gc_d1_info;
510 /*-- L1 contains an int64 ------------------------------------------------- */
512 /* we support int64s of either 1 or 2 words in size */
514 #if SIZEOF_VOID_P == 8
515 # define LLI_BITMAP 1
518 # define LLI_BITMAP 3
522 INFO_TABLE_RET( stg_gc_l1, LLI_WORDS/*framesize*/, LLI_BITMAP/*bitmap*/, RET_SMALL )
524 L1 = L_[Sp + WDS(1)];
525 Sp_adj(1) + SIZEOF_StgWord64;
526 jump %ENTRY_CODE(Sp(0));
531 Sp_adj(-1) - SIZEOF_StgWord64;
532 L_[Sp + WDS(1)] = L1;
533 Sp(0) = stg_gc_l1_info;
537 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
539 INFO_TABLE_RET( stg_ut_1_0_unreg, 1/*size*/, 0/*BITMAP*/, RET_SMALL )
542 // one ptr is on the stack (Sp(0))
543 jump %ENTRY_CODE(Sp(1));
546 /* -----------------------------------------------------------------------------
547 Generic function entry heap check code.
549 At a function entry point, the arguments are as per the calling convention,
550 i.e. some in regs and some on the stack. There may or may not be
551 a pointer to the function closure in R1 - if there isn't, then the heap
552 check failure code in the function will arrange to load it.
554 The function's argument types are described in its info table, so we
555 can just jump to this bit of generic code to save away all the
556 registers and return to the scheduler.
558 This code arranges the stack like this:
562 +---------------------+
564 +---------------------+
566 +---------------------+
568 +---------------------+
570 The size is the number of words of arguments on the stack, and is cached
571 in the frame in order to simplify stack walking: otherwise the size of
572 this stack frame would have to be calculated by looking at f's info table.
574 -------------------------------------------------------------------------- */
582 info = %GET_FUN_INFO(R1);
585 type = TO_W_(StgFunInfoExtra_fun_type(info));
586 if (type == ARG_GEN) {
587 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
589 if (type == ARG_GEN_BIG) {
590 #ifdef TABLES_NEXT_TO_CODE
591 // bitmap field holds an offset
592 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
593 + %GET_ENTRY(R1) /* ### */ );
595 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
598 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
603 // we don't have to save any registers away
607 Sp(0) = stg_gc_fun_info;
611 type = TO_W_(StgFunInfoExtra_fun_type(info));
613 if (type == ARG_GEN || type == ARG_GEN_BIG) {
614 // regs already saved by the heap check code
618 Sp(0) = stg_gc_fun_info;
619 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
622 jump W_[stg_stack_save_entries + WDS(type)];
623 // jumps to stg_gc_noregs after saving stuff
625 #endif /* !NO_ARG_REGS */
628 /* -----------------------------------------------------------------------------
629 Generic Apply (return point)
631 The dual to stg_fun_gc_gen (above): this fragment returns to the
632 function, passing arguments in the stack and in registers
633 appropriately. The stack layout is given above.
634 -------------------------------------------------------------------------- */
636 INFO_TABLE_RET( stg_gc_fun, 0/*framesize*/, 0/*bitmap*/, RET_FUN )
641 // Minor optimisation: there are no argument registers to load up,
642 // so we can just jump straight to the function's entry point.
648 info = %GET_FUN_INFO(R1);
649 type = TO_W_(StgFunInfoExtra_fun_type(info));
650 if (type == ARG_GEN || type == ARG_GEN_BIG) {
651 jump StgFunInfoExtra_slow_apply(info);
653 if (type == ARG_BCO) {
654 // cover this case just to be on the safe side
657 Sp(0) = stg_apply_interp_info;
658 jump stg_yield_to_interpreter;
660 jump W_[stg_ap_stack_entries + WDS(type)];
666 /* -----------------------------------------------------------------------------
667 Generic Heap Check Code.
669 Called with Liveness mask in R9, Return address in R10.
670 Stack must be consistent (containing all necessary info pointers
673 See StgMacros.h for a description of the RET_DYN stack frame.
675 We also define an stg_gen_yield here, because it's very similar.
676 -------------------------------------------------------------------------- */
678 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
679 // on a 64-bit machine, we'll end up wasting a couple of words, but
680 // it's not a big deal.
682 #define RESTORE_EVERYTHING \
683 L1 = L_[Sp + WDS(19)]; \
684 D2 = D_[Sp + WDS(17)]; \
685 D1 = D_[Sp + WDS(15)]; \
686 F4 = F_[Sp + WDS(14)]; \
687 F3 = F_[Sp + WDS(13)]; \
688 F2 = F_[Sp + WDS(12)]; \
689 F1 = F_[Sp + WDS(11)]; \
700 #define RET_OFFSET (-19)
702 #define SAVE_EVERYTHING \
704 L_[Sp + WDS(19)] = L1; \
705 D_[Sp + WDS(17)] = D2; \
706 D_[Sp + WDS(15)] = D1; \
707 F_[Sp + WDS(14)] = F4; \
708 F_[Sp + WDS(13)] = F3; \
709 F_[Sp + WDS(12)] = F2; \
710 F_[Sp + WDS(11)] = F1; \
719 Sp(2) = R10; /* return address */ \
720 Sp(1) = R9; /* liveness mask */ \
721 Sp(0) = stg_gc_gen_info;
723 INFO_TABLE_RET( stg_gc_gen, 0/*framesize*/, 0/*bitmap*/, RET_DYN )
724 /* bitmap in the above info table is unused, the real one is on the stack. */
727 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
736 // A heap check at an unboxed tuple return point. The return address
737 // is on the stack, and we can find it by using the offsets given
738 // to us in the liveness mask.
741 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
747 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
748 * because we've just failed doYouWantToGC(), not a standard heap
749 * check. GC_GENERIC would end up returning StackOverflow.
757 /* -----------------------------------------------------------------------------
759 -------------------------------------------------------------------------- */
772 /* -----------------------------------------------------------------------------
773 Yielding to the interpreter... top of stack says what to do next.
774 -------------------------------------------------------------------------- */
776 stg_yield_to_interpreter
778 YIELD_TO_INTERPRETER;
781 /* -----------------------------------------------------------------------------
783 -------------------------------------------------------------------------- */
800 Sp(0) = stg_enter_info;
804 /* -----------------------------------------------------------------------------
805 * takeMVar/putMVar-specific blocks
807 * Stack layout for a thread blocked in takeMVar:
811 * stg_block_takemvar_info
813 * Stack layout for a thread blocked in putMVar:
818 * stg_block_putmvar_info
820 * See PrimOps.hc for a description of the workings of take/putMVar.
822 * -------------------------------------------------------------------------- */
824 INFO_TABLE_RET( stg_block_takemvar, 1/*framesize*/, 0/*bitmap*/, RET_SMALL )
828 jump takeMVarzh_fast;
831 // code fragment executed just before we return to the scheduler
832 stg_block_takemvar_finally
835 foreign "C" unlockClosure(R3 "ptr", stg_EMPTY_MVAR_info);
844 Sp(0) = stg_block_takemvar_info;
846 BLOCK_BUT_FIRST(stg_block_takemvar_finally);
849 INFO_TABLE_RET( stg_block_putmvar, 2/*framesize*/, 0/*bitmap*/, RET_SMALL )
857 // code fragment executed just before we return to the scheduler
858 stg_block_putmvar_finally
861 foreign "C" unlockClosure(R3 "ptr", stg_FULL_MVAR_info);
871 Sp(0) = stg_block_putmvar_info;
873 BLOCK_BUT_FIRST(stg_block_putmvar_finally);
876 #ifdef mingw32_HOST_OS
877 INFO_TABLE_RET( stg_block_async, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
882 ares = StgTSO_block_info(CurrentTSO);
883 len = StgAsyncIOResult_len(ares);
884 errC = StgAsyncIOResult_errCode(ares);
885 StgTSO_block_info(CurrentTSO) = NULL;
886 foreign "C" free(ares "ptr");
889 jump %ENTRY_CODE(Sp(1));
895 Sp(0) = stg_block_async_info;
899 /* Used by threadDelay implementation; it would be desirable to get rid of
900 * this free()'ing void return continuation.
902 INFO_TABLE_RET( stg_block_async_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
906 ares = StgTSO_block_info(CurrentTSO);
907 StgTSO_block_info(CurrentTSO) = NULL;
908 foreign "C" free(ares "ptr");
910 jump %ENTRY_CODE(Sp(0));
916 Sp(0) = stg_block_async_void_info;