1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 1998-2004
5 * Canned Heap-Check and Stack-Check sequences.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * ---------------------------------------------------------------------------*/
15 import pthread_mutex_unlock;
17 /* Stack/Heap Check Failure
18 * ------------------------
20 * On discovering that a stack or heap check has failed, we do the following:
22 * - If the context_switch flag is set, indicating that there are more
23 * threads waiting to run, we yield to the scheduler
24 * (return ThreadYielding).
26 * - If Hp > HpLim, we've had a heap check failure. This means we've
27 * come to the end of the current heap block, so we try to chain
28 * another block on with ExtendNursery().
30 * - If this succeeds, we carry on without returning to the
33 * - If it fails, we return to the scheduler claiming HeapOverflow
34 * so that a garbage collection can be performed.
36 * - If Hp <= HpLim, it must have been a stack check that failed. In
37 * which case, we return to the scheduler claiming StackOverflow, the
38 * scheduler will either increase the size of our stack, or raise
39 * an exception if the stack is already too big.
41 * The effect of checking for context switch only in the heap/stack check
42 * failure code is that we'll switch threads after the current thread has
43 * reached the end of its heap block. If a thread isn't allocating
44 * at all, it won't yield. Hopefully this won't be a problem in practice.
47 #define PRE_RETURN(why,what_next) \
48 StgTSO_what_next(CurrentTSO) = what_next::I16; \
49 StgRegTable_rRet(BaseReg) = why; \
52 /* Remember that the return address is *removed* when returning to a
53 * ThreadRunGHC thread.
57 DEBUG_ONLY(foreign "C" heapCheckFail()); \
59 Hp = Hp - HpAlloc/*in bytes*/; \
60 if (HpAlloc <= BLOCK_SIZE \
61 && bdescr_link(CurrentNursery) != NULL) { \
63 CurrentNursery = bdescr_link(CurrentNursery); \
65 if (CInt[context_switch] != 0 :: CInt) { \
66 R1 = ThreadYielding; \
69 jump %ENTRY_CODE(Sp(0)); \
79 PRE_RETURN(R1,ThreadRunGHC); \
80 jump stg_returnToSched;
83 PRE_RETURN(HeapOverflow, ThreadRunGHC) \
84 jump stg_returnToSched;
86 #define BLOCK_GENERIC \
87 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
88 jump stg_returnToSched;
90 #define YIELD_GENERIC \
91 PRE_RETURN(ThreadYielding, ThreadRunGHC) \
92 jump stg_returnToSched;
94 #define BLOCK_BUT_FIRST(c) \
95 PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
97 jump stg_returnToSchedButFirst;
99 #define YIELD_TO_INTERPRETER \
100 PRE_RETURN(ThreadYielding, ThreadInterpret) \
101 jump stg_returnToSchedNotPaused;
103 /* -----------------------------------------------------------------------------
104 Heap checks in thunks/functions.
106 In these cases, node always points to the function closure. This gives
107 us an easy way to return to the function: just leave R1 on the top of
108 the stack, and have the scheduler enter it to return.
110 There are canned sequences for 'n' pointer values in registers.
111 -------------------------------------------------------------------------- */
113 INFO_TABLE_RET( stg_enter, RET_SMALL, "ptr" W_ unused)
124 Sp(0) = stg_enter_info;
130 ToDo: merge the block and yield macros, calling something like BLOCK(N)
135 Should we actually ever do a yield in such a case?? -- HWL
140 TSO_what_next(CurrentTSO) = ThreadRunGHC;
150 TSO_what_next(CurrentTSO) = ThreadRunGHC;
155 /*- 2 Regs--------------------------------------------------------------------*/
163 TSO_what_next(CurrentTSO) = ThreadRunGHC;
168 /*- 3 Regs -------------------------------------------------------------------*/
177 TSO_what_next(CurrentTSO) = ThreadRunGHC;
182 /*- 4 Regs -------------------------------------------------------------------*/
192 TSO_what_next(CurrentTSO) = ThreadRunGHC;
197 /*- 5 Regs -------------------------------------------------------------------*/
208 TSO_what_next(CurrentTSO) = ThreadRunGHC;
213 /*- 6 Regs -------------------------------------------------------------------*/
225 TSO_what_next(CurrentTSO) = ThreadRunGHC;
230 /*- 7 Regs -------------------------------------------------------------------*/
243 TSO_what_next(CurrentTSO) = ThreadRunGHC;
248 /*- 8 Regs -------------------------------------------------------------------*/
262 TSO_what_next(CurrentTSO) = ThreadRunGHC;
267 // the same routines but with a block rather than a yield
274 TSO_what_next(CurrentTSO) = ThreadRunGHC;
279 /*- 2 Regs--------------------------------------------------------------------*/
287 TSO_what_next(CurrentTSO) = ThreadRunGHC;
292 /*- 3 Regs -------------------------------------------------------------------*/
301 TSO_what_next(CurrentTSO) = ThreadRunGHC;
306 /*- 4 Regs -------------------------------------------------------------------*/
316 TSO_what_next(CurrentTSO) = ThreadRunGHC;
321 /*- 5 Regs -------------------------------------------------------------------*/
332 TSO_what_next(CurrentTSO) = ThreadRunGHC;
337 /*- 6 Regs -------------------------------------------------------------------*/
349 TSO_what_next(CurrentTSO) = ThreadRunGHC;
354 /*- 7 Regs -------------------------------------------------------------------*/
367 TSO_what_next(CurrentTSO) = ThreadRunGHC;
372 /*- 8 Regs -------------------------------------------------------------------*/
386 TSO_what_next(CurrentTSO) = ThreadRunGHC;
393 #if 0 && defined(PAR)
396 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
397 saving of the thread state from the actual jump via an StgReturn.
398 We need this separation because we call RTS routines in blocking entry codes
399 before jumping back into the RTS (see parallel/FetchMe.hc).
411 TSO_what_next(CurrentTSO) = ThreadRunGHC;
418 /* -----------------------------------------------------------------------------
419 Heap checks in Primitive case alternatives
421 A primitive case alternative is entered with a value either in
422 R1, FloatReg1 or D1 depending on the return convention. All the
423 cases are covered below.
424 -------------------------------------------------------------------------- */
426 /*-- No Registers live ------------------------------------------------------ */
433 /*-- void return ------------------------------------------------------------ */
435 INFO_TABLE_RET( stg_gc_void, RET_SMALL)
438 jump %ENTRY_CODE(Sp(0));
441 /*-- R1 is boxed/unpointed -------------------------------------------------- */
443 INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, "ptr" W_ unused)
447 jump %ENTRY_CODE(Sp(0));
454 Sp(0) = stg_gc_unpt_r1_info;
458 /*-- R1 is unboxed -------------------------------------------------- */
460 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
461 INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused )
465 jump %ENTRY_CODE(Sp(0));
472 Sp(0) = stg_gc_unbx_r1_info;
476 /*-- F1 contains a float ------------------------------------------------- */
478 INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused )
482 jump %ENTRY_CODE(Sp(0));
488 F_[Sp + WDS(1)] = F1;
489 Sp(0) = stg_gc_f1_info;
493 /*-- D1 contains a double ------------------------------------------------- */
495 INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused )
497 D1 = D_[Sp + WDS(1)];
498 Sp = Sp + WDS(1) + SIZEOF_StgDouble;
499 jump %ENTRY_CODE(Sp(0));
504 Sp = Sp - WDS(1) - SIZEOF_StgDouble;
505 D_[Sp + WDS(1)] = D1;
506 Sp(0) = stg_gc_d1_info;
511 /*-- L1 contains an int64 ------------------------------------------------- */
513 INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
515 L1 = L_[Sp + WDS(1)];
516 Sp_adj(1) + SIZEOF_StgWord64;
517 jump %ENTRY_CODE(Sp(0));
522 Sp_adj(-1) - SIZEOF_StgWord64;
523 L_[Sp + WDS(1)] = L1;
524 Sp(0) = stg_gc_l1_info;
528 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
530 INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused )
533 // one ptr is on the stack (Sp(0))
534 jump %ENTRY_CODE(Sp(1));
537 /* -----------------------------------------------------------------------------
538 Generic function entry heap check code.
540 At a function entry point, the arguments are as per the calling convention,
541 i.e. some in regs and some on the stack. There may or may not be
542 a pointer to the function closure in R1 - if there isn't, then the heap
543 check failure code in the function will arrange to load it.
545 The function's argument types are described in its info table, so we
546 can just jump to this bit of generic code to save away all the
547 registers and return to the scheduler.
549 This code arranges the stack like this:
553 +---------------------+
555 +---------------------+
557 +---------------------+
559 +---------------------+
561 The size is the number of words of arguments on the stack, and is cached
562 in the frame in order to simplify stack walking: otherwise the size of
563 this stack frame would have to be calculated by looking at f's info table.
565 -------------------------------------------------------------------------- */
573 info = %GET_FUN_INFO(UNTAG(R1));
576 type = TO_W_(StgFunInfoExtra_fun_type(info));
577 if (type == ARG_GEN) {
578 size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
580 if (type == ARG_GEN_BIG) {
581 #ifdef TABLES_NEXT_TO_CODE
582 // bitmap field holds an offset
583 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
584 + %GET_ENTRY(UNTAG(R1)) /* ### */ );
586 size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
589 size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
594 // we don't have to save any registers away
598 Sp(0) = stg_gc_fun_info;
602 type = TO_W_(StgFunInfoExtra_fun_type(info));
604 if (type == ARG_GEN || type == ARG_GEN_BIG) {
605 // regs already saved by the heap check code
609 Sp(0) = stg_gc_fun_info;
610 // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
613 jump W_[stg_stack_save_entries + WDS(type)];
614 // jumps to stg_gc_noregs after saving stuff
616 #endif /* !NO_ARG_REGS */
619 /* -----------------------------------------------------------------------------
620 Generic Apply (return point)
622 The dual to stg_fun_gc_gen (above): this fragment returns to the
623 function, passing arguments in the stack and in registers
624 appropriately. The stack layout is given above.
625 -------------------------------------------------------------------------- */
627 INFO_TABLE_RET( stg_gc_fun, RET_FUN )
632 // Minor optimisation: there are no argument registers to load up,
633 // so we can just jump straight to the function's entry point.
634 jump %GET_ENTRY(UNTAG(R1));
639 info = %GET_FUN_INFO(UNTAG(R1));
640 type = TO_W_(StgFunInfoExtra_fun_type(info));
641 if (type == ARG_GEN || type == ARG_GEN_BIG) {
642 jump StgFunInfoExtra_slow_apply(info);
644 if (type == ARG_BCO) {
645 // cover this case just to be on the safe side
648 Sp(0) = stg_apply_interp_info;
649 jump stg_yield_to_interpreter;
651 jump W_[stg_ap_stack_entries + WDS(type)];
657 /* -----------------------------------------------------------------------------
658 Generic Heap Check Code.
660 Called with Liveness mask in R9, Return address in R10.
661 Stack must be consistent (containing all necessary info pointers
664 See StgMacros.h for a description of the RET_DYN stack frame.
666 We also define an stg_gen_yield here, because it's very similar.
667 -------------------------------------------------------------------------- */
669 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
670 // on a 64-bit machine, we'll end up wasting a couple of words, but
671 // it's not a big deal.
673 #define RESTORE_EVERYTHING \
674 L1 = L_[Sp + WDS(19)]; \
675 D2 = D_[Sp + WDS(17)]; \
676 D1 = D_[Sp + WDS(15)]; \
677 F4 = F_[Sp + WDS(14)]; \
678 F3 = F_[Sp + WDS(13)]; \
679 F2 = F_[Sp + WDS(12)]; \
680 F1 = F_[Sp + WDS(11)]; \
691 #define RET_OFFSET (-19)
693 #define SAVE_EVERYTHING \
695 L_[Sp + WDS(19)] = L1; \
696 D_[Sp + WDS(17)] = D2; \
697 D_[Sp + WDS(15)] = D1; \
698 F_[Sp + WDS(14)] = F4; \
699 F_[Sp + WDS(13)] = F3; \
700 F_[Sp + WDS(12)] = F2; \
701 F_[Sp + WDS(11)] = F1; \
710 Sp(2) = R10; /* return address */ \
711 Sp(1) = R9; /* liveness mask */ \
712 Sp(0) = stg_gc_gen_info;
714 INFO_TABLE_RET( stg_gc_gen, RET_DYN )
715 /* bitmap in the above info table is unused, the real one is on the stack. */
718 jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
727 // A heap check at an unboxed tuple return point. The return address
728 // is on the stack, and we can find it by using the offsets given
729 // to us in the liveness mask.
732 R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
738 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
739 * because we've just failed doYouWantToGC(), not a standard heap
740 * check. GC_GENERIC would end up returning StackOverflow.
748 /* -----------------------------------------------------------------------------
750 -------------------------------------------------------------------------- */
763 /* -----------------------------------------------------------------------------
764 Yielding to the interpreter... top of stack says what to do next.
765 -------------------------------------------------------------------------- */
767 stg_yield_to_interpreter
769 YIELD_TO_INTERPRETER;
772 /* -----------------------------------------------------------------------------
774 -------------------------------------------------------------------------- */
791 Sp(0) = stg_enter_info;
795 /* -----------------------------------------------------------------------------
796 * takeMVar/putMVar-specific blocks
798 * Stack layout for a thread blocked in takeMVar:
802 * stg_block_takemvar_info
804 * Stack layout for a thread blocked in putMVar:
809 * stg_block_putmvar_info
811 * See PrimOps.hc for a description of the workings of take/putMVar.
813 * -------------------------------------------------------------------------- */
815 INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, "ptr" W_ unused )
819 jump takeMVarzh_fast;
822 // code fragment executed just before we return to the scheduler
823 stg_block_takemvar_finally
826 unlockClosure(R3, stg_EMPTY_MVAR_info);
835 Sp(0) = stg_block_takemvar_info;
837 BLOCK_BUT_FIRST(stg_block_takemvar_finally);
840 INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, "ptr" W_ unused1, "ptr" W_ unused2 )
848 // code fragment executed just before we return to the scheduler
849 stg_block_putmvar_finally
852 unlockClosure(R3, stg_FULL_MVAR_info);
862 Sp(0) = stg_block_putmvar_info;
864 BLOCK_BUT_FIRST(stg_block_putmvar_finally);
867 // code fragment executed just before we return to the scheduler
868 stg_block_blackhole_finally
870 #if defined(THREADED_RTS)
871 // The last thing we do is release sched_lock, which is
872 // preventing other threads from accessing blackhole_queue and
873 // picking up this thread before we are finished with it.
874 foreign "C" RELEASE_LOCK(sched_mutex "ptr");
883 Sp(0) = stg_enter_info;
884 BLOCK_BUT_FIRST(stg_block_blackhole_finally);
887 INFO_TABLE_RET( stg_block_throwto, RET_SMALL, "ptr" W_ unused, "ptr" W_ unused )
892 jump killThreadzh_fast;
895 stg_block_throwto_finally
898 foreign "C" throwToReleaseTarget (R3 "ptr");
908 Sp(0) = stg_block_throwto_info;
909 BLOCK_BUT_FIRST(stg_block_throwto_finally);
912 #ifdef mingw32_HOST_OS
913 INFO_TABLE_RET( stg_block_async, RET_SMALL )
918 ares = StgTSO_block_info(CurrentTSO);
919 len = StgAsyncIOResult_len(ares);
920 errC = StgAsyncIOResult_errCode(ares);
921 StgTSO_block_info(CurrentTSO) = NULL;
922 foreign "C" free(ares "ptr");
925 jump %ENTRY_CODE(Sp(1));
931 Sp(0) = stg_block_async_info;
935 /* Used by threadDelay implementation; it would be desirable to get rid of
936 * this free()'ing void return continuation.
938 INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
942 ares = StgTSO_block_info(CurrentTSO);
943 StgTSO_block_info(CurrentTSO) = NULL;
944 foreign "C" free(ares "ptr");
946 jump %ENTRY_CODE(Sp(0));
952 Sp(0) = stg_block_async_void_info;
958 /* -----------------------------------------------------------------------------
960 -------------------------------------------------------------------------- */
962 stg_block_stmwait_finally
964 foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
970 BLOCK_BUT_FIRST(stg_block_stmwait_finally);