1 /* -----------------------------------------------------------------------------
2 * $Id: HeapStackCheck.hc,v 1.26 2002/03/02 17:43:44 sof Exp $
4 * (c) The GHC Team, 1998-1999
6 * Canned Heap-Check and Stack-Check sequences.
8 * ---------------------------------------------------------------------------*/
12 #include "Storage.h" /* for CurrentTSO */
13 #include "StgRun.h" /* for StgReturn and register saving */
14 #include "Schedule.h" /* for context_switch */
16 /* Stack/Heap Check Failure
17 * ------------------------
19 * On discovering that a stack or heap check has failed, we do the following:
21 * - If the context_switch flag is set, indicating that there are more
22 * threads waiting to run, we yield to the scheduler
23 * (return ThreadYielding).
25 * - If Hp > HpLim, we've had a heap check failure. This means we've
26 * come to the end of the current heap block, so we try to chain
27 * another block on with ExtendNursery().
29 * - If this succeeds, we carry on without returning to the
32 * - If it fails, we return to the scheduler claiming HeapOverflow
33 * so that a garbage collection can be performed.
35 * - If Hp <= HpLim, it must have been a stack check that failed. In
36 * which case, we return to the scheduler claiming StackOverflow, the
37 * scheduler will either increase the size of our stack, or flag
38 * an error if the stack is already too big.
40 * The effect of checking for context switch only in the heap/stack check
41 * failure code is that we'll switch threads after the current thread has
42 * reached the end of its heap block. If a thread isn't allocating
43 * at all, it won't yield. Hopefully this won't be a problem in practice.
46 /* Remember that the return address is *removed* when returning to a
47 * ThreadRunGHC thread.
53 if (HpAlloc <= BLOCK_SIZE_W && ExtendNursery(Hp,HpLim)) {\
54 if (context_switch) { \
55 R1.i = ThreadYielding; \
58 JMP_(ENTRY_CODE(Sp[-1])); \
61 R1.i = HeapOverflow; \
64 R1.i = StackOverflow; \
67 CurrentTSO->what_next = ThreadRunGHC; \
73 if (HpAlloc <= BLOCK_SIZE_W && ExtendNursery(Hp,HpLim)) {\
74 if (context_switch) { \
75 R1.i = ThreadYielding; \
79 JMP_(ENTRY_CODE(*R1.p)); \
82 R1.i = HeapOverflow; \
85 R1.i = StackOverflow; \
88 CurrentTSO->what_next = ThreadEnterGHC; \
93 CurrentTSO->what_next = ThreadRunGHC; \
94 R1.i = HeapOverflow; \
99 CurrentTSO->what_next = ThreadRunGHC; \
100 R1.i = StackOverflow; \
103 #define YIELD_GENERIC \
105 CurrentTSO->what_next = ThreadRunGHC; \
106 R1.i = ThreadYielding; \
109 #define YIELD_TO_INTERPRETER \
111 CurrentTSO->what_next = ThreadEnterInterp; \
112 R1.i = ThreadYielding; \
115 #define BLOCK_GENERIC \
117 CurrentTSO->what_next = ThreadRunGHC; \
118 R1.i = ThreadBlocked; \
121 #define BLOCK_ENTER \
123 CurrentTSO->what_next = ThreadEnterGHC;\
124 R1.i = ThreadBlocked; \
127 /* -----------------------------------------------------------------------------
129 -------------------------------------------------------------------------- */
132 * This one is used when we want to *enter* the top thing on the stack
133 * when we return, instead of the just returning to an address. See
134 * UpdatePAP for an example.
137 EXTFUN(stg_gc_entertop)
144 /* -----------------------------------------------------------------------------
145 Heap checks in non-top-level thunks/functions.
147 In these cases, node always points to the function closure. This gives
148 us an easy way to return to the function: just leave R1 on the top of
149 the stack, and have the scheduler enter it to return.
151 There are canned sequences for 'n' pointer values in registers.
152 -------------------------------------------------------------------------- */
154 EXTFUN(__stg_gc_enter_1)
163 EXTFUN(stg_gc_enter_1_hponly)
170 CurrentTSO->what_next = ThreadEnterGHC;
175 /*- 2 Regs--------------------------------------------------------------------*/
177 EXTFUN(stg_gc_enter_2)
187 /*- 3 Regs -------------------------------------------------------------------*/
189 EXTFUN(stg_gc_enter_3)
200 /*- 4 Regs -------------------------------------------------------------------*/
202 EXTFUN(stg_gc_enter_4)
214 /*- 5 Regs -------------------------------------------------------------------*/
216 EXTFUN(stg_gc_enter_5)
229 /*- 6 Regs -------------------------------------------------------------------*/
231 EXTFUN(stg_gc_enter_6)
245 /*- 7 Regs -------------------------------------------------------------------*/
247 EXTFUN(stg_gc_enter_7)
262 /*- 8 Regs -------------------------------------------------------------------*/
264 EXTFUN(stg_gc_enter_8)
282 ToDo: merge the block and yield macros, calling something like BLOCK(N)
287 Should we actually ever do a yield in such a case?? -- HWL
293 CurrentTSO->what_next = ThreadEnterGHC;
294 R1.i = ThreadYielding;
305 CurrentTSO->what_next = ThreadEnterGHC;
306 R1.i = ThreadYielding;
311 /*- 2 Regs--------------------------------------------------------------------*/
320 CurrentTSO->what_next = ThreadEnterGHC;
321 R1.i = ThreadYielding;
326 /*- 3 Regs -------------------------------------------------------------------*/
336 CurrentTSO->what_next = ThreadEnterGHC;
337 R1.i = ThreadYielding;
342 /*- 4 Regs -------------------------------------------------------------------*/
353 CurrentTSO->what_next = ThreadEnterGHC;
354 R1.i = ThreadYielding;
359 /*- 5 Regs -------------------------------------------------------------------*/
371 CurrentTSO->what_next = ThreadEnterGHC;
372 R1.i = ThreadYielding;
377 /*- 6 Regs -------------------------------------------------------------------*/
390 CurrentTSO->what_next = ThreadEnterGHC;
391 R1.i = ThreadYielding;
396 /*- 7 Regs -------------------------------------------------------------------*/
410 CurrentTSO->what_next = ThreadEnterGHC;
411 R1.i = ThreadYielding;
416 /*- 8 Regs -------------------------------------------------------------------*/
431 CurrentTSO->what_next = ThreadEnterGHC;
432 R1.i = ThreadYielding;
437 // the same routines but with a block rather than a yield
445 CurrentTSO->what_next = ThreadEnterGHC;
446 R1.i = ThreadBlocked;
451 /*- 2 Regs--------------------------------------------------------------------*/
460 CurrentTSO->what_next = ThreadEnterGHC;
461 R1.i = ThreadBlocked;
466 /*- 3 Regs -------------------------------------------------------------------*/
476 CurrentTSO->what_next = ThreadEnterGHC;
477 R1.i = ThreadBlocked;
482 /*- 4 Regs -------------------------------------------------------------------*/
493 CurrentTSO->what_next = ThreadEnterGHC;
494 R1.i = ThreadBlocked;
499 /*- 5 Regs -------------------------------------------------------------------*/
511 CurrentTSO->what_next = ThreadEnterGHC;
512 R1.i = ThreadBlocked;
517 /*- 6 Regs -------------------------------------------------------------------*/
530 CurrentTSO->what_next = ThreadEnterGHC;
531 R1.i = ThreadBlocked;
536 /*- 7 Regs -------------------------------------------------------------------*/
550 CurrentTSO->what_next = ThreadEnterGHC;
551 R1.i = ThreadBlocked;
556 /*- 8 Regs -------------------------------------------------------------------*/
571 CurrentTSO->what_next = ThreadEnterGHC;
572 R1.i = ThreadBlocked;
579 #if 0 && defined(PAR)
582 Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
583 saving of the thread state from the actual jump via an StgReturn.
584 We need this separation because we call RTS routines in blocking entry codes
585 before jumping back into the RTS (see parallel/FetchMe.hc).
588 EXTFUN(par_block_1_no_jump)
600 CurrentTSO->what_next = ThreadEnterGHC;
601 R1.i = ThreadBlocked;
608 /* -----------------------------------------------------------------------------
609 For a case expression on a polymorphic or function-typed object, if
610 the default branch (there can only be one branch) of the case fails
611 a heap-check, instead of using stg_gc_enter_1 as normal, we must
612 push a new SEQ frame on the stack, followed by the object returned.
614 Otherwise, if the object is a function, it won't return to the
615 correct activation record on returning from garbage collection. It will
616 assume it has some arguments and apply itself.
617 -------------------------------------------------------------------------- */
622 Sp -= 1 + sizeofW(StgSeqFrame);
623 PUSH_SEQ_FRAME(Sp+1);
629 /* -----------------------------------------------------------------------------
630 Heap checks in Primitive case alternatives
632 A primitive case alternative is entered with a value either in
633 R1, FloatReg1 or D1 depending on the return convention. All the
634 cases are covered below.
635 -------------------------------------------------------------------------- */
637 /*-- No registers live (probably a void return) ----------------------------- */
639 /* If we change the policy for thread startup to *not* remove the
640 * return address from the stack, we can get rid of this little
641 * function/info table...
643 INFO_TABLE_SRT_BITMAP(stg_gc_noregs_info, stg_gc_noregs_ret, 0/*BITMAP*/,
644 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
645 RET_SMALL,, EF_, 0, 0);
647 EXTFUN(stg_gc_noregs_ret)
650 JMP_(ENTRY_CODE(Sp[0]));
654 EXTFUN(stg_gc_noregs)
658 Sp[0] = (W_)&stg_gc_noregs_info;
663 /*-- R1 is boxed/unpointed -------------------------------------------------- */
665 INFO_TABLE_SRT_BITMAP(stg_gc_unpt_r1_info, stg_gc_unpt_r1_ret, 0/*BITMAP*/,
666 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
667 RET_SMALL,, EF_, 0, 0);
669 EXTFUN(stg_gc_unpt_r1_ret)
674 JMP_(ENTRY_CODE(Sp[0]));
678 EXTFUN(stg_gc_unpt_r1)
683 Sp[0] = (W_)&stg_gc_unpt_r1_info;
688 /*-- Unboxed tuple return (unregisterised build only)------------------ */
690 INFO_TABLE_SRT_BITMAP(stg_ut_1_0_unreg_info, stg_ut_1_0_unreg_ret, 0/*BITMAP*/,
691 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
692 RET_SMALL,, EF_, 0, 0);
694 EXTFUN(stg_ut_1_0_unreg_ret)
697 /* R1 is on the stack (*Sp) */
698 JMP_(ENTRY_CODE(Sp[1]));
702 /*-- R1 is unboxed -------------------------------------------------- */
704 INFO_TABLE_SRT_BITMAP(stg_gc_unbx_r1_info, stg_gc_unbx_r1_ret, 1/*BITMAP*/,
705 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
706 RET_SMALL,, EF_, 0, 0);
707 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
709 EXTFUN(stg_gc_unbx_r1_ret)
714 JMP_(ENTRY_CODE(Sp[0]));
718 EXTFUN(stg_gc_unbx_r1)
723 Sp[0] = (W_)&stg_gc_unbx_r1_info;
728 /*-- F1 contains a float ------------------------------------------------- */
730 INFO_TABLE_SRT_BITMAP(stg_gc_f1_info, stg_gc_f1_ret, 1/*BITMAP*/,
731 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
732 RET_SMALL,, EF_, 0, 0);
734 EXTFUN(stg_gc_f1_ret)
739 JMP_(ENTRY_CODE(Sp[0]));
747 ASSIGN_FLT(Sp+1, F1);
748 Sp[0] = (W_)&stg_gc_f1_info;
753 /*-- D1 contains a double ------------------------------------------------- */
755 /* we support doubles of either 1 or 2 words in size */
757 #if SIZEOF_DOUBLE == SIZEOF_VOID_P
758 # define DBL_BITMAP 1
760 # define DBL_BITMAP 3
763 INFO_TABLE_SRT_BITMAP(stg_gc_d1_info, stg_gc_d1_ret, DBL_BITMAP,
764 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
765 RET_SMALL,, EF_, 0, 0);
767 EXTFUN(stg_gc_d1_ret)
771 Sp += sizeofW(StgDouble);
772 JMP_(ENTRY_CODE(Sp[0]));
779 Sp -= 1 + sizeofW(StgDouble);
781 Sp[0] = (W_)&stg_gc_d1_info;
787 /*-- L1 contains an int64 ------------------------------------------------- */
789 /* we support int64s of either 1 or 2 words in size */
791 #if SIZEOF_VOID_P == 8
792 # define LLI_BITMAP 1
794 # define LLI_BITMAP 3
797 INFO_TABLE_SRT_BITMAP(stg_gc_l1_info, stg_gc_l1_ret, LLI_BITMAP,
798 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
799 RET_SMALL,, EF_, 0, 0);
801 EXTFUN(stg_gc_l1_ret)
805 Sp += sizeofW(StgWord64);
806 JMP_(ENTRY_CODE(Sp[0]));
813 Sp -= 1 + sizeofW(StgWord64);
814 ASSIGN_Int64(Sp+1,L1);
815 Sp[0] = (W_)&stg_gc_l1_info;
820 /* -----------------------------------------------------------------------------
821 Heap checks for unboxed tuple case alternatives
825 - for an unboxed tuple with n components, we rearrange the components
826 with pointers first followed by non-pointers. (NB: not done yet)
828 - The first k components are allocated registers, where k is the
829 number of components that will fit in real registers.
831 - The rest are placed on the stack, with space left for tagging
832 of the non-pointer block if necessary.
834 - On failure of a heap check:
835 - the tag is filled in if necessary,
836 - we load Ri with the address of the continuation,
837 where i is the lowest unused vanilla register.
838 - jump to 'stg_gc_ut_x_y' where x is the number of pointer
839 registers and y the number of non-pointers.
840 - if the required canned sequence isn't available, it will
841 have to be generated at compile-time by the code
842 generator (this will probably happen if there are
843 floating-point values, for instance).
845 For now, just deal with R1, hence R2 contains the sequel address.
846 -------------------------------------------------------------------------- */
848 /*---- R1 contains a pointer: ------ */
850 INFO_TABLE_SRT_BITMAP(stg_gc_ut_1_0_info, stg_gc_ut_1_0_ret, 1/*BITMAP*/,
851 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
852 RET_SMALL,, EF_, 0, 0);
854 EXTFUN(stg_gc_ut_1_0_ret)
859 JMP_(ENTRY_CODE(Sp[-2]));
863 EXTFUN(stg_gc_ut_1_0)
869 Sp[0] = (W_)&stg_gc_ut_1_0_info;
874 /*---- R1 contains a non-pointer: ------ */
876 INFO_TABLE_SRT_BITMAP(stg_gc_ut_0_1_info, stg_gc_ut_0_1_ret, 3/*BITMAP*/,
877 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
878 RET_SMALL,, EF_, 0, 0);
880 EXTFUN(stg_gc_ut_0_1_ret)
885 JMP_(ENTRY_CODE(Sp[-2]));
889 EXTFUN(stg_gc_ut_0_1)
893 Sp[0] = (W_)&stg_gc_ut_0_1_info;
900 /* -----------------------------------------------------------------------------
901 Standard top-level fast-entry heap checks.
903 - we want to make the stack look like it should at the slow entry
904 point for the function. That way we can just push the slow
905 entry point on the stack and return using ThreadRunGHC.
907 - The compiler will generate code to fill in any tags on the stack,
908 in case we arrived directly at the fast entry point and these tags
911 - The rest is hopefully handled by jumping to a canned sequence.
912 We currently have canned sequences for 0-8 pointer registers. If
913 any registers contain non-pointers, we must reduce to an all-pointers
914 situation by pushing as many registers on the stack as necessary.
916 eg. if R1, R2 contain pointers and R3 contains a word, the heap check
917 failure sequence looks like this:
924 after pushing R3, we have pointers in R1 and R2 which corresponds
925 to the 2-pointer canned sequence.
927 -------------------------------------------------------------------------- */
929 /*- 0 Regs -------------------------------------------------------------------*/
940 /*- 1 Reg --------------------------------------------------------------------*/
952 /*- 1 Reg (non-ptr) ----------------------------------------------------------*/
959 Sp[1] = WORD_TAG; /* ToDo: or maybe its an int? */
965 /*- 2 Regs--------------------------------------------------------------------*/
978 /*- 3 Regs -------------------------------------------------------------------*/
992 /*- 4 Regs -------------------------------------------------------------------*/
1007 /*- 5 Regs -------------------------------------------------------------------*/
1023 /*- 6 Regs -------------------------------------------------------------------*/
1040 /*- 7 Regs -------------------------------------------------------------------*/
1058 /*- 8 Regs -------------------------------------------------------------------*/
1077 /* -----------------------------------------------------------------------------
1078 Generic Heap Check Code.
1080 Called with Liveness mask in R9, Return address in R10.
1081 Stack must be consistent (tagged, and containing all necessary info pointers
1084 We also define an stg_gen_yield here, because it's very similar.
1085 -------------------------------------------------------------------------- */
1087 #if SIZEOF_DOUBLE > SIZEOF_VOID_P
1089 #define RESTORE_EVERYTHING \
1090 D2 = PK_DBL(Sp+16); \
1091 D1 = PK_DBL(Sp+14); \
1092 F4 = PK_FLT(Sp+13); \
1093 F3 = PK_FLT(Sp+12); \
1094 F2 = PK_FLT(Sp+11); \
1095 F1 = PK_FLT(Sp+10); \
1106 #define RET_OFFSET (-17)
1108 #define SAVE_EVERYTHING \
1109 ASSIGN_DBL(Sp-2,D2); \
1110 ASSIGN_DBL(Sp-4,D1); \
1111 ASSIGN_FLT(Sp-5,F4); \
1112 ASSIGN_FLT(Sp-6,F3); \
1113 ASSIGN_FLT(Sp-7,F2); \
1114 ASSIGN_FLT(Sp-8,F1); \
1123 Sp[-17] = R10.w; /* return address */ \
1124 Sp[-18] = R9.w; /* liveness mask */ \
1125 Sp[-19] = (W_)&stg_gen_chk_info; \
1130 #define RESTORE_EVERYTHING \
1131 D2 = PK_DBL(Sp+15); \
1132 D1 = PK_DBL(Sp+14); \
1133 F4 = PK_FLT(Sp+13); \
1134 F3 = PK_FLT(Sp+12); \
1135 F2 = PK_FLT(Sp+11); \
1136 F1 = PK_FLT(Sp+10); \
1147 #define RET_OFFSET (-15)
1149 #define SAVE_EVERYTHING \
1150 ASSIGN_DBL(Sp-1,D2); \
1151 ASSIGN_DBL(Sp-2,D1); \
1152 ASSIGN_FLT(Sp-3,F4); \
1153 ASSIGN_FLT(Sp-4,F3); \
1154 ASSIGN_FLT(Sp-5,F2); \
1155 ASSIGN_FLT(Sp-6,F1); \
1164 Sp[-15] = R10.w; /* return address */ \
1165 Sp[-16] = R9.w; /* liveness mask */ \
1166 Sp[-17] = (W_)&stg_gen_chk_info; \
1171 INFO_TABLE_SRT_BITMAP(stg_gen_chk_info, stg_gen_chk_ret, 0,
1172 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
1173 RET_DYN,, EF_, 0, 0);
1175 /* bitmap in the above info table is unused, the real one is on the stack.
1178 FN_(stg_gen_chk_ret)
1182 JMP_(Sp[RET_OFFSET]); /* NO ENTRY_CODE() - this is a direct ret address */
1195 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
1196 * because we've just failed doYouWantToGC(), not a standard heap
1197 * check. GC_GENERIC would end up returning StackOverflow.
1207 /* -----------------------------------------------------------------------------
1209 -------------------------------------------------------------------------- */
1219 FN_(stg_yield_noregs)
1223 Sp[0] = (W_)&stg_gc_noregs_info;
1228 FN_(stg_yield_to_interpreter)
1231 /* No need to save everything - no live registers */
1232 YIELD_TO_INTERPRETER
1236 /* -----------------------------------------------------------------------------
1238 -------------------------------------------------------------------------- */
1248 FN_(stg_block_noregs)
1252 Sp[0] = (W_)&stg_gc_noregs_info;
1266 /* -----------------------------------------------------------------------------
1267 * takeMVar/putMVar-specific blocks
1269 * Stack layout for a thread blocked in takeMVar:
1273 * stg_block_takemvar_info
1275 * Stack layout for a thread blocked in putMVar:
1280 * stg_block_putmvar_info
1282 * See PrimOps.hc for a description of the workings of take/putMVar.
1284 * -------------------------------------------------------------------------- */
1286 INFO_TABLE_SRT_BITMAP(stg_block_takemvar_info, stg_block_takemvar_ret,
1287 0/*BITMAP*/, 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
1288 RET_SMALL,, IF_, 0, 0);
1290 IF_(stg_block_takemvar_ret)
1295 JMP_(takeMVarzh_fast);
1299 FN_(stg_block_takemvar)
1304 Sp[0] = (W_)&stg_block_takemvar_info;
1309 INFO_TABLE_SRT_BITMAP(stg_block_putmvar_info, stg_block_putmvar_ret,
1310 0/*BITMAP*/, 0/*SRT*/, 0/*SRT_OFF*/, 0/*SRT_LEN*/,
1311 RET_SMALL,, IF_, 0, 0);
1313 IF_(stg_block_putmvar_ret)
1319 JMP_(putMVarzh_fast);
1323 FN_(stg_block_putmvar)
1329 Sp[0] = (W_)&stg_block_putmvar_info;