Retract Hp *before* checking for HpLim==0
[ghc-hetmet.git] / rts / HeapStackCheck.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Canned Heap-Check and Stack-Check sequences.
6  *
7  * This file is written in a subset of C--, extended with various
8  * features specific to GHC.  It is compiled by GHC directly.  For the
9  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
10  *
11  * ---------------------------------------------------------------------------*/
12
13 #include "Cmm.h"
14
15 #ifdef __PIC__
16 import pthread_mutex_unlock;
17 #endif
18 import EnterCriticalSection;
19 import LeaveCriticalSection;
20
21 /* Stack/Heap Check Failure
22  * ------------------------
23  *
24  * On discovering that a stack or heap check has failed, we do the following:
25  *
26  *    - If HpLim==0, indicating that we should context-switch, we yield
27  *      to the scheduler (return ThreadYielding).
28  *
29  *    - If the context_switch flag is set (the backup plan if setting HpLim
30  *      to 0 didn't trigger a context switch), we yield to the scheduler
31  *      (return ThreadYielding).
32  *
33  *    - If Hp > HpLim, we've had a heap check failure.  This means we've
34  *      come to the end of the current heap block, so we try to chain
35  *      another block on with ExtendNursery().  
36  *
37  *           - If this succeeds, we carry on without returning to the 
38  *             scheduler.  
39  *
40  *           - If it fails, we return to the scheduler claiming HeapOverflow
41  *             so that a garbage collection can be performed.
42  *
43  *    - If Hp <= HpLim, it must have been a stack check that failed.  In
44  *      which case, we return to the scheduler claiming StackOverflow, the
45  *      scheduler will either increase the size of our stack, or raise
46  *      an exception if the stack is already too big.
47  *
48  * The effect of checking for context switch only in the heap/stack check
49  * failure code is that we'll switch threads after the current thread has
50  * reached the end of its heap block.  If a thread isn't allocating
51  * at all, it won't yield.  Hopefully this won't be a problem in practice.
52  */
53  
54 #define PRE_RETURN(why,what_next)                       \
55   StgTSO_what_next(CurrentTSO) = what_next::I16;        \
56   StgRegTable_rRet(BaseReg) = why;                      \
57   R1 = BaseReg;
58
59 /* Remember that the return address is *removed* when returning to a
60  * ThreadRunGHC thread.
61  */
62
63 #define GC_GENERIC                                              \
64     DEBUG_ONLY(foreign "C" heapCheckFail());                    \
65     if (Hp > HpLim) {                                           \
66         Hp = Hp - HpAlloc/*in bytes*/;                          \
67         if (HpLim == 0) { \
68                 R1 = ThreadYielding;                            \
69                 goto sched;                                     \
70         }                                               \
71         if (HpAlloc <= BLOCK_SIZE                               \
72             && bdescr_link(CurrentNursery) != NULL) {           \
73             CLOSE_NURSERY();                                    \
74             CurrentNursery = bdescr_link(CurrentNursery);       \
75             OPEN_NURSERY();                                     \
76             if (Capability_context_switch(MyCapability()) != 0 :: CInt) { \
77                 R1 = ThreadYielding;                            \
78                 goto sched;                                     \
79             } else {                                            \
80                 jump %ENTRY_CODE(Sp(0));                        \
81             }                                                   \
82         } else {                                                \
83             R1 = HeapOverflow;                                  \
84             goto sched;                                         \
85         }                                                       \
86     } else {                                                    \
87         R1 = StackOverflow;                                     \
88     }                                                           \
89   sched:                                                        \
90     PRE_RETURN(R1,ThreadRunGHC);                                \
91     jump stg_returnToSched;
92
93 #define HP_GENERIC                              \
94    PRE_RETURN(HeapOverflow, ThreadRunGHC)       \
95   jump stg_returnToSched;
96
97 #define BLOCK_GENERIC                           \
98    PRE_RETURN(ThreadBlocked,  ThreadRunGHC)     \
99   jump stg_returnToSched;
100
101 #define YIELD_GENERIC                           \
102   PRE_RETURN(ThreadYielding, ThreadRunGHC)      \
103   jump stg_returnToSched;
104
105 #define BLOCK_BUT_FIRST(c)                      \
106   PRE_RETURN(ThreadBlocked, ThreadRunGHC)       \
107   R2 = c;                                       \
108   jump stg_returnToSchedButFirst;
109
110 #define YIELD_TO_INTERPRETER                    \
111   PRE_RETURN(ThreadYielding, ThreadInterpret)   \
112   jump stg_returnToSchedNotPaused;
113
114 /* -----------------------------------------------------------------------------
115    Heap checks in thunks/functions.
116
117    In these cases, node always points to the function closure.  This gives
118    us an easy way to return to the function: just leave R1 on the top of
119    the stack, and have the scheduler enter it to return.
120
121    There are canned sequences for 'n' pointer values in registers.
122    -------------------------------------------------------------------------- */
123
124 INFO_TABLE_RET( stg_enter, RET_SMALL, P_ unused)
125 {
126     R1 = Sp(1);
127     Sp_adj(2);
128     ENTER();
129 }
130
131 __stg_gc_enter_1
132 {
133     Sp_adj(-2);
134     Sp(1) = R1;
135     Sp(0) = stg_enter_info;
136     GC_GENERIC
137 }
138
139 #if defined(GRAN)
140 /*
141   ToDo: merge the block and yield macros, calling something like BLOCK(N)
142         at the end;
143 */
144
145 /* 
146    Should we actually ever do a yield in such a case?? -- HWL
147 */
148 gran_yield_0
149 {
150     SAVE_THREAD_STATE();                                        
151     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
152     R1 = ThreadYielding;
153     jump StgReturn;
154 }
155
156 gran_yield_1
157 {
158     Sp_adj(-1);
159     Sp(0) = R1;
160     SAVE_THREAD_STATE();                                        
161     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
162     R1 = ThreadYielding;
163     jump StgReturn;
164 }
165
166 /*- 2 Regs--------------------------------------------------------------------*/
167
168 gran_yield_2
169 {
170     Sp_adj(-2);
171     Sp(1) = R2;
172     Sp(0) = R1;
173     SAVE_THREAD_STATE();                                        
174     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
175     R1 = ThreadYielding;
176     jump StgReturn;
177 }
178
179 /*- 3 Regs -------------------------------------------------------------------*/
180
181 gran_yield_3
182 {
183     Sp_adj(-3);
184     Sp(2) = R3;
185     Sp(1) = R2;
186     Sp(0) = R1;
187     SAVE_THREAD_STATE();                                        
188     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
189     R1 = ThreadYielding;
190     jump StgReturn;
191 }
192
193 /*- 4 Regs -------------------------------------------------------------------*/
194
195 gran_yield_4
196 {
197     Sp_adj(-4);
198     Sp(3) = R4;
199     Sp(2) = R3;
200     Sp(1) = R2;
201     Sp(0) = R1;
202     SAVE_THREAD_STATE();                                        
203     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
204     R1 = ThreadYielding;
205     jump StgReturn;
206 }
207
208 /*- 5 Regs -------------------------------------------------------------------*/
209
210 gran_yield_5
211 {
212     Sp_adj(-5);
213     Sp(4) = R5;
214     Sp(3) = R4;
215     Sp(2) = R3;
216     Sp(1) = R2;
217     Sp(0) = R1;
218     SAVE_THREAD_STATE();                                        
219     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
220     R1 = ThreadYielding;
221     jump StgReturn;
222 }
223
224 /*- 6 Regs -------------------------------------------------------------------*/
225
226 gran_yield_6
227 {
228     Sp_adj(-6);
229     Sp(5) = R6;
230     Sp(4) = R5;
231     Sp(3) = R4;
232     Sp(2) = R3;
233     Sp(1) = R2;
234     Sp(0) = R1;
235     SAVE_THREAD_STATE();                                        
236     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
237     R1 = ThreadYielding;
238     jump StgReturn;
239 }
240
241 /*- 7 Regs -------------------------------------------------------------------*/
242
243 gran_yield_7
244 {
245     Sp_adj(-7);
246     Sp(6) = R7;
247     Sp(5) = R6;
248     Sp(4) = R5;
249     Sp(3) = R4;
250     Sp(2) = R3;
251     Sp(1) = R2;
252     Sp(0) = R1;
253     SAVE_THREAD_STATE();                                        
254     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
255     R1 = ThreadYielding;
256     jump StgReturn;
257 }
258
259 /*- 8 Regs -------------------------------------------------------------------*/
260
261 gran_yield_8
262 {
263     Sp_adj(-8);
264     Sp(7) = R8;
265     Sp(6) = R7;
266     Sp(5) = R6;
267     Sp(4) = R5;
268     Sp(3) = R4;
269     Sp(2) = R3;
270     Sp(1) = R2;
271     Sp(0) = R1;
272     SAVE_THREAD_STATE();                                        
273     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
274     R1 = ThreadYielding;
275     jump StgReturn;
276 }
277
278 // the same routines but with a block rather than a yield
279
280 gran_block_1
281 {
282     Sp_adj(-1);
283     Sp(0) = R1;
284     SAVE_THREAD_STATE();                                        
285     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
286     R1 = ThreadBlocked;
287     jump StgReturn;
288 }
289
290 /*- 2 Regs--------------------------------------------------------------------*/
291
292 gran_block_2
293 {
294     Sp_adj(-2);
295     Sp(1) = R2;
296     Sp(0) = R1;
297     SAVE_THREAD_STATE();                                        
298     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
299     R1 = ThreadBlocked;
300     jump StgReturn;
301 }
302
303 /*- 3 Regs -------------------------------------------------------------------*/
304
305 gran_block_3
306 {
307     Sp_adj(-3);
308     Sp(2) = R3;
309     Sp(1) = R2;
310     Sp(0) = R1;
311     SAVE_THREAD_STATE();                                        
312     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
313     R1 = ThreadBlocked;
314     jump StgReturn;
315 }
316
317 /*- 4 Regs -------------------------------------------------------------------*/
318
319 gran_block_4
320 {
321     Sp_adj(-4);
322     Sp(3) = R4;
323     Sp(2) = R3;
324     Sp(1) = R2;
325     Sp(0) = R1;
326     SAVE_THREAD_STATE();                                        
327     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
328     R1 = ThreadBlocked;
329     jump StgReturn;
330 }
331
332 /*- 5 Regs -------------------------------------------------------------------*/
333
334 gran_block_5
335 {
336     Sp_adj(-5);
337     Sp(4) = R5;
338     Sp(3) = R4;
339     Sp(2) = R3;
340     Sp(1) = R2;
341     Sp(0) = R1;
342     SAVE_THREAD_STATE();                                        
343     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
344     R1 = ThreadBlocked;
345     jump StgReturn;
346 }
347
348 /*- 6 Regs -------------------------------------------------------------------*/
349
350 gran_block_6
351 {
352     Sp_adj(-6);
353     Sp(5) = R6;
354     Sp(4) = R5;
355     Sp(3) = R4;
356     Sp(2) = R3;
357     Sp(1) = R2;
358     Sp(0) = R1;
359     SAVE_THREAD_STATE();                                        
360     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
361     R1 = ThreadBlocked;
362     jump StgReturn;
363 }
364
365 /*- 7 Regs -------------------------------------------------------------------*/
366
367 gran_block_7
368 {
369     Sp_adj(-7);
370     Sp(6) = R7;
371     Sp(5) = R6;
372     Sp(4) = R5;
373     Sp(3) = R4;
374     Sp(2) = R3;
375     Sp(1) = R2;
376     Sp(0) = R1;
377     SAVE_THREAD_STATE();                                        
378     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
379     R1 = ThreadBlocked;
380     jump StgReturn;
381 }
382
383 /*- 8 Regs -------------------------------------------------------------------*/
384
385 gran_block_8
386 {
387     Sp_adj(-8);
388     Sp(7) = R8;
389     Sp(6) = R7;
390     Sp(5) = R6;
391     Sp(4) = R5;
392     Sp(3) = R4;
393     Sp(2) = R3;
394     Sp(1) = R2;
395     Sp(0) = R1;
396     SAVE_THREAD_STATE();                                        
397     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
398     R1 = ThreadBlocked;
399     jump StgReturn;
400 }
401
402 #endif
403
404 #if 0 && defined(PAR)
405
406 /*
407   Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
408   saving of the thread state from the actual jump via an StgReturn.
409   We need this separation because we call RTS routines in blocking entry codes
410   before jumping back into the RTS (see parallel/FetchMe.hc).
411 */
412
413 par_block_1_no_jump
414 {
415     Sp_adj(-1);
416     Sp(0) = R1;
417     SAVE_THREAD_STATE();                                        
418 }
419
420 par_jump
421 {
422     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
423     R1 = ThreadBlocked;
424     jump StgReturn;
425 }
426
427 #endif
428
429 /* -----------------------------------------------------------------------------
430    Heap checks in Primitive case alternatives
431
432    A primitive case alternative is entered with a value either in 
433    R1, FloatReg1 or D1 depending on the return convention.  All the
434    cases are covered below.
435    -------------------------------------------------------------------------- */
436
437 /*-- No Registers live ------------------------------------------------------ */
438
439 stg_gc_noregs
440 {
441     GC_GENERIC
442 }
443
444 /*-- void return ------------------------------------------------------------ */
445
446 INFO_TABLE_RET( stg_gc_void, RET_SMALL)
447 {
448     Sp_adj(1);
449     jump %ENTRY_CODE(Sp(0));
450 }
451
452 /*-- R1 is boxed/unpointed -------------------------------------------------- */
453
454 INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, P_ unused)
455 {
456     R1 = Sp(1);
457     Sp_adj(2);
458     jump %ENTRY_CODE(Sp(0));
459 }
460
461 stg_gc_unpt_r1
462 {
463     Sp_adj(-2);
464     Sp(1) = R1;
465     Sp(0) = stg_gc_unpt_r1_info;
466     GC_GENERIC
467 }
468
469 /*-- R1 is unboxed -------------------------------------------------- */
470
471 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
472 INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused )
473 {
474     R1 = Sp(1);
475     Sp_adj(2);
476     jump %ENTRY_CODE(Sp(0));
477 }
478
479 stg_gc_unbx_r1
480 {
481     Sp_adj(-2);
482     Sp(1) = R1;
483     Sp(0) = stg_gc_unbx_r1_info;
484     GC_GENERIC
485 }
486
487 /*-- F1 contains a float ------------------------------------------------- */
488
489 INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused )
490 {
491     F1 = F_[Sp+WDS(1)];
492     Sp_adj(2);
493     jump %ENTRY_CODE(Sp(0));
494 }
495
496 stg_gc_f1
497 {
498     Sp_adj(-2);
499     F_[Sp + WDS(1)] = F1;
500     Sp(0) = stg_gc_f1_info;
501     GC_GENERIC
502 }
503
504 /*-- D1 contains a double ------------------------------------------------- */
505
506 INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused )
507 {
508     D1 = D_[Sp + WDS(1)];
509     Sp = Sp + WDS(1) + SIZEOF_StgDouble;
510     jump %ENTRY_CODE(Sp(0));
511 }
512
513 stg_gc_d1
514 {
515     Sp = Sp - WDS(1) - SIZEOF_StgDouble;
516     D_[Sp + WDS(1)] = D1;
517     Sp(0) = stg_gc_d1_info;
518     GC_GENERIC
519 }
520
521
522 /*-- L1 contains an int64 ------------------------------------------------- */
523
524 INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
525 {
526     L1 = L_[Sp + WDS(1)];
527     Sp_adj(1) + SIZEOF_StgWord64;
528     jump %ENTRY_CODE(Sp(0));
529 }
530
531 stg_gc_l1
532 {
533     Sp_adj(-1) - SIZEOF_StgWord64;
534     L_[Sp + WDS(1)] = L1;
535     Sp(0) = stg_gc_l1_info;
536     GC_GENERIC
537 }
538
539 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
540
541 INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused )
542 {
543     Sp_adj(1);
544     // one ptr is on the stack (Sp(0))
545     jump %ENTRY_CODE(Sp(1));
546 }
547
548 /* -----------------------------------------------------------------------------
549    Generic function entry heap check code.
550
551    At a function entry point, the arguments are as per the calling convention,
552    i.e. some in regs and some on the stack.  There may or may not be 
553    a pointer to the function closure in R1 - if there isn't, then the heap
554    check failure code in the function will arrange to load it.
555
556    The function's argument types are described in its info table, so we
557    can just jump to this bit of generic code to save away all the
558    registers and return to the scheduler.
559
560    This code arranges the stack like this:
561          
562          |        ....         |
563          |        args         |
564          +---------------------+
565          |      f_closure      |
566          +---------------------+
567          |        size         |
568          +---------------------+
569          |   stg_gc_fun_info   |
570          +---------------------+
571
572    The size is the number of words of arguments on the stack, and is cached
573    in the frame in order to simplify stack walking: otherwise the size of
574    this stack frame would have to be calculated by looking at f's info table.
575
576    -------------------------------------------------------------------------- */
577
578 __stg_gc_fun
579 {
580     W_ size;
581     W_ info;
582     W_ type;
583
584     info = %GET_FUN_INFO(UNTAG(R1));
585
586     // cache the size
587     type = TO_W_(StgFunInfoExtra_fun_type(info));
588     if (type == ARG_GEN) {
589         size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
590     } else { 
591         if (type == ARG_GEN_BIG) {
592 #ifdef TABLES_NEXT_TO_CODE
593             // bitmap field holds an offset
594             size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
595                                         + %GET_ENTRY(UNTAG(R1)) /* ### */ );
596 #else
597             size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
598 #endif
599         } else {
600             size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
601         }
602     }
603     
604 #ifdef NO_ARG_REGS
605     // we don't have to save any registers away
606     Sp_adj(-3);
607     Sp(2) = R1;
608     Sp(1) = size;
609     Sp(0) = stg_gc_fun_info;
610     GC_GENERIC
611 #else
612     W_ type;
613     type = TO_W_(StgFunInfoExtra_fun_type(info));
614     // cache the size
615     if (type == ARG_GEN || type == ARG_GEN_BIG) {
616         // regs already saved by the heap check code
617         Sp_adj(-3);
618         Sp(2) = R1;
619         Sp(1) = size;
620         Sp(0) = stg_gc_fun_info;
621         // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
622         GC_GENERIC
623     } else { 
624         jump W_[stg_stack_save_entries + WDS(type)];
625             // jumps to stg_gc_noregs after saving stuff
626     }
627 #endif /* !NO_ARG_REGS */
628 }
629
630 /* -----------------------------------------------------------------------------
631    Generic Apply (return point)
632
633    The dual to stg_fun_gc_gen (above): this fragment returns to the
634    function, passing arguments in the stack and in registers
635    appropriately.  The stack layout is given above.
636    -------------------------------------------------------------------------- */
637
638 INFO_TABLE_RET( stg_gc_fun, RET_FUN )
639 {
640     R1 = Sp(2);
641     Sp_adj(3);
642 #ifdef NO_ARG_REGS
643     // Minor optimisation: there are no argument registers to load up,
644     // so we can just jump straight to the function's entry point.
645     jump %GET_ENTRY(UNTAG(R1));
646 #else
647     W_ info;
648     W_ type;
649     
650     info = %GET_FUN_INFO(UNTAG(R1));
651     type = TO_W_(StgFunInfoExtra_fun_type(info));
652     if (type == ARG_GEN || type == ARG_GEN_BIG) {
653         jump StgFunInfoExtra_slow_apply(info);
654     } else { 
655         if (type == ARG_BCO) {
656             // cover this case just to be on the safe side
657             Sp_adj(-2);
658             Sp(1) = R1;
659             Sp(0) = stg_apply_interp_info;
660             jump stg_yield_to_interpreter;
661         } else {
662             jump W_[stg_ap_stack_entries + WDS(type)];
663         }
664     }
665 #endif
666 }
667
668 /* -----------------------------------------------------------------------------
669    Generic Heap Check Code.
670
671    Called with Liveness mask in R9,  Return address in R10.
672    Stack must be consistent (containing all necessary info pointers
673    to relevant SRTs).
674
675    See StgMacros.h for a description of the RET_DYN stack frame.
676
677    We also define an stg_gen_yield here, because it's very similar.
678    -------------------------------------------------------------------------- */
679
680 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
681 // on a 64-bit machine, we'll end up wasting a couple of words, but
682 // it's not a big deal.
683
684 #define RESTORE_EVERYTHING                      \
685     L1   = L_[Sp + WDS(19)];                    \
686     D2   = D_[Sp + WDS(17)];                    \
687     D1   = D_[Sp + WDS(15)];                    \
688     F4   = F_[Sp + WDS(14)];                    \
689     F3   = F_[Sp + WDS(13)];                    \
690     F2   = F_[Sp + WDS(12)];                    \
691     F1   = F_[Sp + WDS(11)];                    \
692     R8 = Sp(10);                                \
693     R7 = Sp(9);                                 \
694     R6 = Sp(8);                                 \
695     R5 = Sp(7);                                 \
696     R4 = Sp(6);                                 \
697     R3 = Sp(5);                                 \
698     R2 = Sp(4);                                 \
699     R1 = Sp(3);                                 \
700     Sp_adj(21);
701
702 #define RET_OFFSET (-19)
703
704 #define SAVE_EVERYTHING                         \
705     Sp_adj(-21);                                \
706     L_[Sp + WDS(19)] = L1;                      \
707     D_[Sp + WDS(17)] = D2;                      \
708     D_[Sp + WDS(15)] = D1;                      \
709     F_[Sp + WDS(14)] = F4;                      \
710     F_[Sp + WDS(13)] = F3;                      \
711     F_[Sp + WDS(12)] = F2;                      \
712     F_[Sp + WDS(11)] = F1;                      \
713     Sp(10) = R8;                                \
714     Sp(9) = R7;                                 \
715     Sp(8) = R6;                                 \
716     Sp(7) = R5;                                 \
717     Sp(6) = R4;                                 \
718     Sp(5) = R3;                                 \
719     Sp(4) = R2;                                 \
720     Sp(3) = R1;                                 \
721     Sp(2) = R10;    /* return address */        \
722     Sp(1) = R9;     /* liveness mask  */        \
723     Sp(0) = stg_gc_gen_info;
724
725 INFO_TABLE_RET( stg_gc_gen, RET_DYN )
726 /* bitmap in the above info table is unused, the real one is on the stack. */
727 {
728     RESTORE_EVERYTHING;
729     jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
730 }
731
732 stg_gc_gen
733 {
734     SAVE_EVERYTHING;
735     GC_GENERIC
736 }         
737
738 // A heap check at an unboxed tuple return point.  The return address
739 // is on the stack, and we can find it by using the offsets given
740 // to us in the liveness mask.
741 stg_gc_ut
742 {
743     R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
744     SAVE_EVERYTHING;
745     GC_GENERIC
746 }
747
748 /*
749  * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
750  * because we've just failed doYouWantToGC(), not a standard heap
751  * check.  GC_GENERIC would end up returning StackOverflow.
752  */
753 stg_gc_gen_hp
754 {
755     SAVE_EVERYTHING;
756     HP_GENERIC
757 }         
758
759 /* -----------------------------------------------------------------------------
760    Yields
761    -------------------------------------------------------------------------- */
762
763 stg_gen_yield
764 {
765     SAVE_EVERYTHING;
766     YIELD_GENERIC
767 }
768
769 stg_yield_noregs
770 {
771     YIELD_GENERIC;
772 }
773
774 /* -----------------------------------------------------------------------------
775    Yielding to the interpreter... top of stack says what to do next.
776    -------------------------------------------------------------------------- */
777
778 stg_yield_to_interpreter
779 {
780     YIELD_TO_INTERPRETER;
781 }
782
783 /* -----------------------------------------------------------------------------
784    Blocks
785    -------------------------------------------------------------------------- */
786
787 stg_gen_block
788 {
789     SAVE_EVERYTHING;
790     BLOCK_GENERIC;
791 }
792
793 stg_block_noregs
794 {
795     BLOCK_GENERIC;
796 }
797
798 stg_block_1
799 {
800     Sp_adj(-2);
801     Sp(1) = R1;
802     Sp(0) = stg_enter_info;
803     BLOCK_GENERIC;
804 }
805
806 /* -----------------------------------------------------------------------------
807  * takeMVar/putMVar-specific blocks
808  *
809  * Stack layout for a thread blocked in takeMVar:
810  *      
811  *       ret. addr
812  *       ptr to MVar   (R1)
813  *       stg_block_takemvar_info
814  *
815  * Stack layout for a thread blocked in putMVar:
816  *      
817  *       ret. addr
818  *       ptr to Value  (R2)
819  *       ptr to MVar   (R1)
820  *       stg_block_putmvar_info
821  *
822  * See PrimOps.hc for a description of the workings of take/putMVar.
823  * 
824  * -------------------------------------------------------------------------- */
825
826 INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, P_ unused )
827 {
828     R1 = Sp(1);
829     Sp_adj(2);
830     jump takeMVarzh_fast;
831 }
832
833 // code fragment executed just before we return to the scheduler
834 stg_block_takemvar_finally
835 {
836 #ifdef THREADED_RTS
837     unlockClosure(R3, stg_MVAR_DIRTY_info);
838 #else
839     SET_INFO(R3, stg_MVAR_DIRTY_info);
840 #endif
841     jump StgReturn;
842 }
843
844 stg_block_takemvar
845 {
846     Sp_adj(-2);
847     Sp(1) = R1;
848     Sp(0) = stg_block_takemvar_info;
849     R3 = R1;
850     BLOCK_BUT_FIRST(stg_block_takemvar_finally);
851 }
852
853 INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, P_ unused1, P_ unused2 )
854 {
855     R2 = Sp(2);
856     R1 = Sp(1);
857     Sp_adj(3);
858     jump putMVarzh_fast;
859 }
860
861 // code fragment executed just before we return to the scheduler
862 stg_block_putmvar_finally
863 {
864 #ifdef THREADED_RTS
865     unlockClosure(R3, stg_MVAR_DIRTY_info);
866 #else
867     SET_INFO(R3, stg_MVAR_DIRTY_info);
868 #endif
869     jump StgReturn;
870 }
871
872 stg_block_putmvar
873 {
874     Sp_adj(-3);
875     Sp(2) = R2;
876     Sp(1) = R1;
877     Sp(0) = stg_block_putmvar_info;
878     R3 = R1;
879     BLOCK_BUT_FIRST(stg_block_putmvar_finally);
880 }
881
882 // code fragment executed just before we return to the scheduler
883 stg_block_blackhole_finally
884 {
885 #if defined(THREADED_RTS)
886     // The last thing we do is release sched_lock, which is
887     // preventing other threads from accessing blackhole_queue and
888     // picking up this thread before we are finished with it.
889     RELEASE_LOCK(sched_mutex "ptr");
890 #endif
891     jump StgReturn;
892 }
893
894 stg_block_blackhole
895 {
896     Sp_adj(-2);
897     Sp(1) = R1;
898     Sp(0) = stg_enter_info;
899     BLOCK_BUT_FIRST(stg_block_blackhole_finally);
900 }
901
902 INFO_TABLE_RET( stg_block_throwto, RET_SMALL, P_ unused, P_ unused )
903 {
904     R2 = Sp(2);
905     R1 = Sp(1);
906     Sp_adj(3);
907     jump killThreadzh_fast;
908 }
909
910 stg_block_throwto_finally
911 {
912 #ifdef THREADED_RTS
913     foreign "C" throwToReleaseTarget (R3 "ptr");
914 #endif
915     jump StgReturn;
916 }
917
918 stg_block_throwto
919 {
920     Sp_adj(-3);
921     Sp(2) = R2;
922     Sp(1) = R1;
923     Sp(0) = stg_block_throwto_info;
924     BLOCK_BUT_FIRST(stg_block_throwto_finally);
925 }
926
927 #ifdef mingw32_HOST_OS
928 INFO_TABLE_RET( stg_block_async, RET_SMALL )
929 {
930     W_ ares;
931     W_ len, errC;
932
933     ares = StgTSO_block_info(CurrentTSO);
934     len = StgAsyncIOResult_len(ares);
935     errC = StgAsyncIOResult_errCode(ares);
936     StgTSO_block_info(CurrentTSO) = NULL;
937     foreign "C" free(ares "ptr");
938     R1 = len;
939     Sp(0) = errC;
940     jump %ENTRY_CODE(Sp(1));
941 }
942
943 stg_block_async
944 {
945     Sp_adj(-1);
946     Sp(0) = stg_block_async_info;
947     BLOCK_GENERIC;
948 }
949
950 /* Used by threadDelay implementation; it would be desirable to get rid of
951  * this free()'ing void return continuation.
952  */
953 INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
954 {
955     W_ ares;
956
957     ares = StgTSO_block_info(CurrentTSO);
958     StgTSO_block_info(CurrentTSO) = NULL;
959     foreign "C" free(ares "ptr");
960     Sp_adj(1);
961     jump %ENTRY_CODE(Sp(0));
962 }
963
964 stg_block_async_void
965 {
966     Sp_adj(-1);
967     Sp(0) = stg_block_async_void_info;
968     BLOCK_GENERIC;
969 }
970
971 #endif
972
973 /* -----------------------------------------------------------------------------
974    STM-specific waiting
975    -------------------------------------------------------------------------- */
976
977 stg_block_stmwait_finally
978 {
979     foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
980     jump StgReturn;
981 }
982
983 stg_block_stmwait
984 {
985     BLOCK_BUT_FIRST(stg_block_stmwait_finally);
986 }