rename StgSync to SpinLock
[ghc-hetmet.git] / rts / HeapStackCheck.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Canned Heap-Check and Stack-Check sequences.
6  *
7  * This file is written in a subset of C--, extended with various
8  * features specific to GHC.  It is compiled by GHC directly.  For the
9  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
10  *
11  * ---------------------------------------------------------------------------*/
12
13 #include "Cmm.h"
14
15 #ifdef __PIC__
16 import pthread_mutex_unlock;
17 #endif
18 import EnterCriticalSection;
19 import LeaveCriticalSection;
20
21 /* Stack/Heap Check Failure
22  * ------------------------
23  *
24  * On discovering that a stack or heap check has failed, we do the following:
25  *
26  *    - If the context_switch flag is set, indicating that there are more
27  *      threads waiting to run, we yield to the scheduler 
28  *      (return ThreadYielding).
29  *
30  *    - If Hp > HpLim, we've had a heap check failure.  This means we've
31  *      come to the end of the current heap block, so we try to chain
32  *      another block on with ExtendNursery().  
33  *
34  *           - If this succeeds, we carry on without returning to the 
35  *             scheduler.  
36  *
37  *           - If it fails, we return to the scheduler claiming HeapOverflow
38  *             so that a garbage collection can be performed.
39  *
40  *    - If Hp <= HpLim, it must have been a stack check that failed.  In
41  *      which case, we return to the scheduler claiming StackOverflow, the
42  *      scheduler will either increase the size of our stack, or raise
43  *      an exception if the stack is already too big.
44  *
45  * The effect of checking for context switch only in the heap/stack check
46  * failure code is that we'll switch threads after the current thread has
47  * reached the end of its heap block.  If a thread isn't allocating
48  * at all, it won't yield.  Hopefully this won't be a problem in practice.
49  */
50  
51 #define PRE_RETURN(why,what_next)                       \
52   StgTSO_what_next(CurrentTSO) = what_next::I16;        \
53   StgRegTable_rRet(BaseReg) = why;                      \
54   R1 = BaseReg;
55
56 /* Remember that the return address is *removed* when returning to a
57  * ThreadRunGHC thread.
58  */
59
60 #define GC_GENERIC                                              \
61     DEBUG_ONLY(foreign "C" heapCheckFail());                    \
62     if (Hp > HpLim) {                                           \
63         Hp = Hp - HpAlloc/*in bytes*/;                          \
64         if (HpAlloc <= BLOCK_SIZE                               \
65             && bdescr_link(CurrentNursery) != NULL) {           \
66             CLOSE_NURSERY();                                    \
67             CurrentNursery = bdescr_link(CurrentNursery);       \
68             OPEN_NURSERY();                                     \
69             if (CInt[context_switch] != 0 :: CInt) {            \
70                 R1 = ThreadYielding;                            \
71                 goto sched;                                     \
72             } else {                                            \
73                 jump %ENTRY_CODE(Sp(0));                        \
74             }                                                   \
75         } else {                                                \
76             R1 = HeapOverflow;                                  \
77             goto sched;                                         \
78         }                                                       \
79     } else {                                                    \
80         R1 = StackOverflow;                                     \
81     }                                                           \
82   sched:                                                        \
83     PRE_RETURN(R1,ThreadRunGHC);                                \
84     jump stg_returnToSched;
85
86 #define HP_GENERIC                              \
87    PRE_RETURN(HeapOverflow, ThreadRunGHC)       \
88   jump stg_returnToSched;
89
90 #define BLOCK_GENERIC                           \
91    PRE_RETURN(ThreadBlocked,  ThreadRunGHC)     \
92   jump stg_returnToSched;
93
94 #define YIELD_GENERIC                           \
95   PRE_RETURN(ThreadYielding, ThreadRunGHC)      \
96   jump stg_returnToSched;
97
98 #define BLOCK_BUT_FIRST(c)                      \
99   PRE_RETURN(ThreadBlocked, ThreadRunGHC)       \
100   R2 = c;                                       \
101   jump stg_returnToSchedButFirst;
102
103 #define YIELD_TO_INTERPRETER                    \
104   PRE_RETURN(ThreadYielding, ThreadInterpret)   \
105   jump stg_returnToSchedNotPaused;
106
107 /* -----------------------------------------------------------------------------
108    Heap checks in thunks/functions.
109
110    In these cases, node always points to the function closure.  This gives
111    us an easy way to return to the function: just leave R1 on the top of
112    the stack, and have the scheduler enter it to return.
113
114    There are canned sequences for 'n' pointer values in registers.
115    -------------------------------------------------------------------------- */
116
117 INFO_TABLE_RET( stg_enter, RET_SMALL, "ptr" W_ unused)
118 {
119     R1 = Sp(1);
120     Sp_adj(2);
121     ENTER();
122 }
123
124 __stg_gc_enter_1
125 {
126     Sp_adj(-2);
127     Sp(1) = R1;
128     Sp(0) = stg_enter_info;
129     GC_GENERIC
130 }
131
132 #if defined(GRAN)
133 /*
134   ToDo: merge the block and yield macros, calling something like BLOCK(N)
135         at the end;
136 */
137
138 /* 
139    Should we actually ever do a yield in such a case?? -- HWL
140 */
141 gran_yield_0
142 {
143     SAVE_THREAD_STATE();                                        
144     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
145     R1 = ThreadYielding;
146     jump StgReturn;
147 }
148
149 gran_yield_1
150 {
151     Sp_adj(-1);
152     Sp(0) = R1;
153     SAVE_THREAD_STATE();                                        
154     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
155     R1 = ThreadYielding;
156     jump StgReturn;
157 }
158
159 /*- 2 Regs--------------------------------------------------------------------*/
160
161 gran_yield_2
162 {
163     Sp_adj(-2);
164     Sp(1) = R2;
165     Sp(0) = R1;
166     SAVE_THREAD_STATE();                                        
167     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
168     R1 = ThreadYielding;
169     jump StgReturn;
170 }
171
172 /*- 3 Regs -------------------------------------------------------------------*/
173
174 gran_yield_3
175 {
176     Sp_adj(-3);
177     Sp(2) = R3;
178     Sp(1) = R2;
179     Sp(0) = R1;
180     SAVE_THREAD_STATE();                                        
181     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
182     R1 = ThreadYielding;
183     jump StgReturn;
184 }
185
186 /*- 4 Regs -------------------------------------------------------------------*/
187
188 gran_yield_4
189 {
190     Sp_adj(-4);
191     Sp(3) = R4;
192     Sp(2) = R3;
193     Sp(1) = R2;
194     Sp(0) = R1;
195     SAVE_THREAD_STATE();                                        
196     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
197     R1 = ThreadYielding;
198     jump StgReturn;
199 }
200
201 /*- 5 Regs -------------------------------------------------------------------*/
202
203 gran_yield_5
204 {
205     Sp_adj(-5);
206     Sp(4) = R5;
207     Sp(3) = R4;
208     Sp(2) = R3;
209     Sp(1) = R2;
210     Sp(0) = R1;
211     SAVE_THREAD_STATE();                                        
212     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
213     R1 = ThreadYielding;
214     jump StgReturn;
215 }
216
217 /*- 6 Regs -------------------------------------------------------------------*/
218
219 gran_yield_6
220 {
221     Sp_adj(-6);
222     Sp(5) = R6;
223     Sp(4) = R5;
224     Sp(3) = R4;
225     Sp(2) = R3;
226     Sp(1) = R2;
227     Sp(0) = R1;
228     SAVE_THREAD_STATE();                                        
229     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
230     R1 = ThreadYielding;
231     jump StgReturn;
232 }
233
234 /*- 7 Regs -------------------------------------------------------------------*/
235
236 gran_yield_7
237 {
238     Sp_adj(-7);
239     Sp(6) = R7;
240     Sp(5) = R6;
241     Sp(4) = R5;
242     Sp(3) = R4;
243     Sp(2) = R3;
244     Sp(1) = R2;
245     Sp(0) = R1;
246     SAVE_THREAD_STATE();                                        
247     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
248     R1 = ThreadYielding;
249     jump StgReturn;
250 }
251
252 /*- 8 Regs -------------------------------------------------------------------*/
253
254 gran_yield_8
255 {
256     Sp_adj(-8);
257     Sp(7) = R8;
258     Sp(6) = R7;
259     Sp(5) = R6;
260     Sp(4) = R5;
261     Sp(3) = R4;
262     Sp(2) = R3;
263     Sp(1) = R2;
264     Sp(0) = R1;
265     SAVE_THREAD_STATE();                                        
266     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
267     R1 = ThreadYielding;
268     jump StgReturn;
269 }
270
271 // the same routines but with a block rather than a yield
272
273 gran_block_1
274 {
275     Sp_adj(-1);
276     Sp(0) = R1;
277     SAVE_THREAD_STATE();                                        
278     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
279     R1 = ThreadBlocked;
280     jump StgReturn;
281 }
282
283 /*- 2 Regs--------------------------------------------------------------------*/
284
285 gran_block_2
286 {
287     Sp_adj(-2);
288     Sp(1) = R2;
289     Sp(0) = R1;
290     SAVE_THREAD_STATE();                                        
291     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
292     R1 = ThreadBlocked;
293     jump StgReturn;
294 }
295
296 /*- 3 Regs -------------------------------------------------------------------*/
297
298 gran_block_3
299 {
300     Sp_adj(-3);
301     Sp(2) = R3;
302     Sp(1) = R2;
303     Sp(0) = R1;
304     SAVE_THREAD_STATE();                                        
305     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
306     R1 = ThreadBlocked;
307     jump StgReturn;
308 }
309
310 /*- 4 Regs -------------------------------------------------------------------*/
311
312 gran_block_4
313 {
314     Sp_adj(-4);
315     Sp(3) = R4;
316     Sp(2) = R3;
317     Sp(1) = R2;
318     Sp(0) = R1;
319     SAVE_THREAD_STATE();                                        
320     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
321     R1 = ThreadBlocked;
322     jump StgReturn;
323 }
324
325 /*- 5 Regs -------------------------------------------------------------------*/
326
327 gran_block_5
328 {
329     Sp_adj(-5);
330     Sp(4) = R5;
331     Sp(3) = R4;
332     Sp(2) = R3;
333     Sp(1) = R2;
334     Sp(0) = R1;
335     SAVE_THREAD_STATE();                                        
336     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
337     R1 = ThreadBlocked;
338     jump StgReturn;
339 }
340
341 /*- 6 Regs -------------------------------------------------------------------*/
342
343 gran_block_6
344 {
345     Sp_adj(-6);
346     Sp(5) = R6;
347     Sp(4) = R5;
348     Sp(3) = R4;
349     Sp(2) = R3;
350     Sp(1) = R2;
351     Sp(0) = R1;
352     SAVE_THREAD_STATE();                                        
353     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
354     R1 = ThreadBlocked;
355     jump StgReturn;
356 }
357
358 /*- 7 Regs -------------------------------------------------------------------*/
359
360 gran_block_7
361 {
362     Sp_adj(-7);
363     Sp(6) = R7;
364     Sp(5) = R6;
365     Sp(4) = R5;
366     Sp(3) = R4;
367     Sp(2) = R3;
368     Sp(1) = R2;
369     Sp(0) = R1;
370     SAVE_THREAD_STATE();                                        
371     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
372     R1 = ThreadBlocked;
373     jump StgReturn;
374 }
375
376 /*- 8 Regs -------------------------------------------------------------------*/
377
378 gran_block_8
379 {
380     Sp_adj(-8);
381     Sp(7) = R8;
382     Sp(6) = R7;
383     Sp(5) = R6;
384     Sp(4) = R5;
385     Sp(3) = R4;
386     Sp(2) = R3;
387     Sp(1) = R2;
388     Sp(0) = R1;
389     SAVE_THREAD_STATE();                                        
390     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
391     R1 = ThreadBlocked;
392     jump StgReturn;
393 }
394
395 #endif
396
397 #if 0 && defined(PAR)
398
399 /*
400   Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
401   saving of the thread state from the actual jump via an StgReturn.
402   We need this separation because we call RTS routines in blocking entry codes
403   before jumping back into the RTS (see parallel/FetchMe.hc).
404 */
405
406 par_block_1_no_jump
407 {
408     Sp_adj(-1);
409     Sp(0) = R1;
410     SAVE_THREAD_STATE();                                        
411 }
412
413 par_jump
414 {
415     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
416     R1 = ThreadBlocked;
417     jump StgReturn;
418 }
419
420 #endif
421
422 /* -----------------------------------------------------------------------------
423    Heap checks in Primitive case alternatives
424
425    A primitive case alternative is entered with a value either in 
426    R1, FloatReg1 or D1 depending on the return convention.  All the
427    cases are covered below.
428    -------------------------------------------------------------------------- */
429
430 /*-- No Registers live ------------------------------------------------------ */
431
432 stg_gc_noregs
433 {
434     GC_GENERIC
435 }
436
437 /*-- void return ------------------------------------------------------------ */
438
439 INFO_TABLE_RET( stg_gc_void, RET_SMALL)
440 {
441     Sp_adj(1);
442     jump %ENTRY_CODE(Sp(0));
443 }
444
445 /*-- R1 is boxed/unpointed -------------------------------------------------- */
446
447 INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, "ptr" W_ unused)
448 {
449     R1 = Sp(1);
450     Sp_adj(2);
451     jump %ENTRY_CODE(Sp(0));
452 }
453
454 stg_gc_unpt_r1
455 {
456     Sp_adj(-2);
457     Sp(1) = R1;
458     Sp(0) = stg_gc_unpt_r1_info;
459     GC_GENERIC
460 }
461
462 /*-- R1 is unboxed -------------------------------------------------- */
463
464 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
465 INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused )
466 {
467     R1 = Sp(1);
468     Sp_adj(2);
469     jump %ENTRY_CODE(Sp(0));
470 }
471
472 stg_gc_unbx_r1
473 {
474     Sp_adj(-2);
475     Sp(1) = R1;
476     Sp(0) = stg_gc_unbx_r1_info;
477     GC_GENERIC
478 }
479
480 /*-- F1 contains a float ------------------------------------------------- */
481
482 INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused )
483 {
484     F1 = F_[Sp+WDS(1)];
485     Sp_adj(2);
486     jump %ENTRY_CODE(Sp(0));
487 }
488
489 stg_gc_f1
490 {
491     Sp_adj(-2);
492     F_[Sp + WDS(1)] = F1;
493     Sp(0) = stg_gc_f1_info;
494     GC_GENERIC
495 }
496
497 /*-- D1 contains a double ------------------------------------------------- */
498
499 INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused )
500 {
501     D1 = D_[Sp + WDS(1)];
502     Sp = Sp + WDS(1) + SIZEOF_StgDouble;
503     jump %ENTRY_CODE(Sp(0));
504 }
505
506 stg_gc_d1
507 {
508     Sp = Sp - WDS(1) - SIZEOF_StgDouble;
509     D_[Sp + WDS(1)] = D1;
510     Sp(0) = stg_gc_d1_info;
511     GC_GENERIC
512 }
513
514
515 /*-- L1 contains an int64 ------------------------------------------------- */
516
517 INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
518 {
519     L1 = L_[Sp + WDS(1)];
520     Sp_adj(1) + SIZEOF_StgWord64;
521     jump %ENTRY_CODE(Sp(0));
522 }
523
524 stg_gc_l1
525 {
526     Sp_adj(-1) - SIZEOF_StgWord64;
527     L_[Sp + WDS(1)] = L1;
528     Sp(0) = stg_gc_l1_info;
529     GC_GENERIC
530 }
531
532 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
533
534 INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused )
535 {
536     Sp_adj(1);
537     // one ptr is on the stack (Sp(0))
538     jump %ENTRY_CODE(Sp(1));
539 }
540
541 /* -----------------------------------------------------------------------------
542    Generic function entry heap check code.
543
544    At a function entry point, the arguments are as per the calling convention,
545    i.e. some in regs and some on the stack.  There may or may not be 
546    a pointer to the function closure in R1 - if there isn't, then the heap
547    check failure code in the function will arrange to load it.
548
549    The function's argument types are described in its info table, so we
550    can just jump to this bit of generic code to save away all the
551    registers and return to the scheduler.
552
553    This code arranges the stack like this:
554          
555          |        ....         |
556          |        args         |
557          +---------------------+
558          |      f_closure      |
559          +---------------------+
560          |        size         |
561          +---------------------+
562          |   stg_gc_fun_info   |
563          +---------------------+
564
565    The size is the number of words of arguments on the stack, and is cached
566    in the frame in order to simplify stack walking: otherwise the size of
567    this stack frame would have to be calculated by looking at f's info table.
568
569    -------------------------------------------------------------------------- */
570
571 __stg_gc_fun
572 {
573     W_ size;
574     W_ info;
575     W_ type;
576
577     info = %GET_FUN_INFO(UNTAG(R1));
578
579     // cache the size
580     type = TO_W_(StgFunInfoExtra_fun_type(info));
581     if (type == ARG_GEN) {
582         size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
583     } else { 
584         if (type == ARG_GEN_BIG) {
585 #ifdef TABLES_NEXT_TO_CODE
586             // bitmap field holds an offset
587             size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
588                                         + %GET_ENTRY(UNTAG(R1)) /* ### */ );
589 #else
590             size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
591 #endif
592         } else {
593             size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
594         }
595     }
596     
597 #ifdef NO_ARG_REGS
598     // we don't have to save any registers away
599     Sp_adj(-3);
600     Sp(2) = R1;
601     Sp(1) = size;
602     Sp(0) = stg_gc_fun_info;
603     GC_GENERIC
604 #else
605     W_ type;
606     type = TO_W_(StgFunInfoExtra_fun_type(info));
607     // cache the size
608     if (type == ARG_GEN || type == ARG_GEN_BIG) {
609         // regs already saved by the heap check code
610         Sp_adj(-3);
611         Sp(2) = R1;
612         Sp(1) = size;
613         Sp(0) = stg_gc_fun_info;
614         // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
615         GC_GENERIC
616     } else { 
617         jump W_[stg_stack_save_entries + WDS(type)];
618             // jumps to stg_gc_noregs after saving stuff
619     }
620 #endif /* !NO_ARG_REGS */
621 }
622
623 /* -----------------------------------------------------------------------------
624    Generic Apply (return point)
625
626    The dual to stg_fun_gc_gen (above): this fragment returns to the
627    function, passing arguments in the stack and in registers
628    appropriately.  The stack layout is given above.
629    -------------------------------------------------------------------------- */
630
631 INFO_TABLE_RET( stg_gc_fun, RET_FUN )
632 {
633     R1 = Sp(2);
634     Sp_adj(3);
635 #ifdef NO_ARG_REGS
636     // Minor optimisation: there are no argument registers to load up,
637     // so we can just jump straight to the function's entry point.
638     jump %GET_ENTRY(UNTAG(R1));
639 #else
640     W_ info;
641     W_ type;
642     
643     info = %GET_FUN_INFO(UNTAG(R1));
644     type = TO_W_(StgFunInfoExtra_fun_type(info));
645     if (type == ARG_GEN || type == ARG_GEN_BIG) {
646         jump StgFunInfoExtra_slow_apply(info);
647     } else { 
648         if (type == ARG_BCO) {
649             // cover this case just to be on the safe side
650             Sp_adj(-2);
651             Sp(1) = R1;
652             Sp(0) = stg_apply_interp_info;
653             jump stg_yield_to_interpreter;
654         } else {
655             jump W_[stg_ap_stack_entries + WDS(type)];
656         }
657     }
658 #endif
659 }
660
661 /* -----------------------------------------------------------------------------
662    Generic Heap Check Code.
663
664    Called with Liveness mask in R9,  Return address in R10.
665    Stack must be consistent (containing all necessary info pointers
666    to relevant SRTs).
667
668    See StgMacros.h for a description of the RET_DYN stack frame.
669
670    We also define an stg_gen_yield here, because it's very similar.
671    -------------------------------------------------------------------------- */
672
673 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
674 // on a 64-bit machine, we'll end up wasting a couple of words, but
675 // it's not a big deal.
676
677 #define RESTORE_EVERYTHING                      \
678     L1   = L_[Sp + WDS(19)];                    \
679     D2   = D_[Sp + WDS(17)];                    \
680     D1   = D_[Sp + WDS(15)];                    \
681     F4   = F_[Sp + WDS(14)];                    \
682     F3   = F_[Sp + WDS(13)];                    \
683     F2   = F_[Sp + WDS(12)];                    \
684     F1   = F_[Sp + WDS(11)];                    \
685     R8 = Sp(10);                                \
686     R7 = Sp(9);                                 \
687     R6 = Sp(8);                                 \
688     R5 = Sp(7);                                 \
689     R4 = Sp(6);                                 \
690     R3 = Sp(5);                                 \
691     R2 = Sp(4);                                 \
692     R1 = Sp(3);                                 \
693     Sp_adj(21);
694
695 #define RET_OFFSET (-19)
696
697 #define SAVE_EVERYTHING                         \
698     Sp_adj(-21);                                \
699     L_[Sp + WDS(19)] = L1;                      \
700     D_[Sp + WDS(17)] = D2;                      \
701     D_[Sp + WDS(15)] = D1;                      \
702     F_[Sp + WDS(14)] = F4;                      \
703     F_[Sp + WDS(13)] = F3;                      \
704     F_[Sp + WDS(12)] = F2;                      \
705     F_[Sp + WDS(11)] = F1;                      \
706     Sp(10) = R8;                                \
707     Sp(9) = R7;                                 \
708     Sp(8) = R6;                                 \
709     Sp(7) = R5;                                 \
710     Sp(6) = R4;                                 \
711     Sp(5) = R3;                                 \
712     Sp(4) = R2;                                 \
713     Sp(3) = R1;                                 \
714     Sp(2) = R10;    /* return address */        \
715     Sp(1) = R9;     /* liveness mask  */        \
716     Sp(0) = stg_gc_gen_info;
717
718 INFO_TABLE_RET( stg_gc_gen, RET_DYN )
719 /* bitmap in the above info table is unused, the real one is on the stack. */
720 {
721     RESTORE_EVERYTHING;
722     jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
723 }
724
725 stg_gc_gen
726 {
727     SAVE_EVERYTHING;
728     GC_GENERIC
729 }         
730
731 // A heap check at an unboxed tuple return point.  The return address
732 // is on the stack, and we can find it by using the offsets given
733 // to us in the liveness mask.
734 stg_gc_ut
735 {
736     R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
737     SAVE_EVERYTHING;
738     GC_GENERIC
739 }
740
741 /*
742  * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
743  * because we've just failed doYouWantToGC(), not a standard heap
744  * check.  GC_GENERIC would end up returning StackOverflow.
745  */
746 stg_gc_gen_hp
747 {
748     SAVE_EVERYTHING;
749     HP_GENERIC
750 }         
751
752 /* -----------------------------------------------------------------------------
753    Yields
754    -------------------------------------------------------------------------- */
755
756 stg_gen_yield
757 {
758     SAVE_EVERYTHING;
759     YIELD_GENERIC
760 }
761
762 stg_yield_noregs
763 {
764     YIELD_GENERIC;
765 }
766
767 /* -----------------------------------------------------------------------------
768    Yielding to the interpreter... top of stack says what to do next.
769    -------------------------------------------------------------------------- */
770
771 stg_yield_to_interpreter
772 {
773     YIELD_TO_INTERPRETER;
774 }
775
776 /* -----------------------------------------------------------------------------
777    Blocks
778    -------------------------------------------------------------------------- */
779
780 stg_gen_block
781 {
782     SAVE_EVERYTHING;
783     BLOCK_GENERIC;
784 }
785
786 stg_block_noregs
787 {
788     BLOCK_GENERIC;
789 }
790
791 stg_block_1
792 {
793     Sp_adj(-2);
794     Sp(1) = R1;
795     Sp(0) = stg_enter_info;
796     BLOCK_GENERIC;
797 }
798
799 /* -----------------------------------------------------------------------------
800  * takeMVar/putMVar-specific blocks
801  *
802  * Stack layout for a thread blocked in takeMVar:
803  *      
804  *       ret. addr
805  *       ptr to MVar   (R1)
806  *       stg_block_takemvar_info
807  *
808  * Stack layout for a thread blocked in putMVar:
809  *      
810  *       ret. addr
811  *       ptr to Value  (R2)
812  *       ptr to MVar   (R1)
813  *       stg_block_putmvar_info
814  *
815  * See PrimOps.hc for a description of the workings of take/putMVar.
816  * 
817  * -------------------------------------------------------------------------- */
818
819 INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, "ptr" W_ unused )
820 {
821     R1 = Sp(1);
822     Sp_adj(2);
823     jump takeMVarzh_fast;
824 }
825
826 // code fragment executed just before we return to the scheduler
827 stg_block_takemvar_finally
828 {
829 #ifdef THREADED_RTS
830     unlockClosure(R3, stg_MVAR_DIRTY_info);
831 #else
832     SET_INFO(R3, stg_MVAR_DIRTY_info);
833 #endif
834     jump StgReturn;
835 }
836
837 stg_block_takemvar
838 {
839     Sp_adj(-2);
840     Sp(1) = R1;
841     Sp(0) = stg_block_takemvar_info;
842     R3 = R1;
843     BLOCK_BUT_FIRST(stg_block_takemvar_finally);
844 }
845
846 INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, "ptr" W_ unused1, "ptr" W_ unused2 )
847 {
848     R2 = Sp(2);
849     R1 = Sp(1);
850     Sp_adj(3);
851     jump putMVarzh_fast;
852 }
853
854 // code fragment executed just before we return to the scheduler
855 stg_block_putmvar_finally
856 {
857 #ifdef THREADED_RTS
858     unlockClosure(R3, stg_MVAR_DIRTY_info);
859 #else
860     SET_INFO(R3, stg_MVAR_DIRTY_info);
861 #endif
862     jump StgReturn;
863 }
864
865 stg_block_putmvar
866 {
867     Sp_adj(-3);
868     Sp(2) = R2;
869     Sp(1) = R1;
870     Sp(0) = stg_block_putmvar_info;
871     R3 = R1;
872     BLOCK_BUT_FIRST(stg_block_putmvar_finally);
873 }
874
875 // code fragment executed just before we return to the scheduler
876 stg_block_blackhole_finally
877 {
878 #if defined(THREADED_RTS)
879     // The last thing we do is release sched_lock, which is
880     // preventing other threads from accessing blackhole_queue and
881     // picking up this thread before we are finished with it.
882     RELEASE_LOCK(sched_mutex "ptr");
883 #endif
884     jump StgReturn;
885 }
886
887 stg_block_blackhole
888 {
889     Sp_adj(-2);
890     Sp(1) = R1;
891     Sp(0) = stg_enter_info;
892     BLOCK_BUT_FIRST(stg_block_blackhole_finally);
893 }
894
895 INFO_TABLE_RET( stg_block_throwto, RET_SMALL, "ptr" W_ unused, "ptr" W_ unused )
896 {
897     R2 = Sp(2);
898     R1 = Sp(1);
899     Sp_adj(3);
900     jump killThreadzh_fast;
901 }
902
903 stg_block_throwto_finally
904 {
905 #ifdef THREADED_RTS
906     foreign "C" throwToReleaseTarget (R3 "ptr");
907 #endif
908     jump StgReturn;
909 }
910
911 stg_block_throwto
912 {
913     Sp_adj(-3);
914     Sp(2) = R2;
915     Sp(1) = R1;
916     Sp(0) = stg_block_throwto_info;
917     BLOCK_BUT_FIRST(stg_block_throwto_finally);
918 }
919
920 #ifdef mingw32_HOST_OS
921 INFO_TABLE_RET( stg_block_async, RET_SMALL )
922 {
923     W_ ares;
924     W_ len, errC;
925
926     ares = StgTSO_block_info(CurrentTSO);
927     len = StgAsyncIOResult_len(ares);
928     errC = StgAsyncIOResult_errCode(ares);
929     StgTSO_block_info(CurrentTSO) = NULL;
930     foreign "C" free(ares "ptr");
931     R1 = len;
932     Sp(0) = errC;
933     jump %ENTRY_CODE(Sp(1));
934 }
935
936 stg_block_async
937 {
938     Sp_adj(-1);
939     Sp(0) = stg_block_async_info;
940     BLOCK_GENERIC;
941 }
942
943 /* Used by threadDelay implementation; it would be desirable to get rid of
944  * this free()'ing void return continuation.
945  */
946 INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
947 {
948     W_ ares;
949
950     ares = StgTSO_block_info(CurrentTSO);
951     StgTSO_block_info(CurrentTSO) = NULL;
952     foreign "C" free(ares "ptr");
953     Sp_adj(1);
954     jump %ENTRY_CODE(Sp(0));
955 }
956
957 stg_block_async_void
958 {
959     Sp_adj(-1);
960     Sp(0) = stg_block_async_void_info;
961     BLOCK_GENERIC;
962 }
963
964 #endif
965
966 /* -----------------------------------------------------------------------------
967    STM-specific waiting
968    -------------------------------------------------------------------------- */
969
970 stg_block_stmwait_finally
971 {
972     foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
973     jump StgReturn;
974 }
975
976 stg_block_stmwait
977 {
978     BLOCK_BUT_FIRST(stg_block_stmwait_finally);
979 }