f1053ebc129d88e10eb4e7e9977ea633cc295835
[ghc-hetmet.git] / ghc / includes / COptJumps.lh
1 \section[COptJumps]{Macros for tail-jumping}
2
3 % this file is part of the C-as-assembler document
4
5 \begin{code}
6 #ifndef COPTJUMPS_H
7 #define COPTJUMPS_H
8 \end{code}
9
10 %************************************************************************
11 %*                                                                      *
12 \subsection[COptJumps-portable]{Tail-(non-)jumping in ``portable~C''}
13 %*                                                                      *
14 %************************************************************************
15
16 \begin{code}
17 #if ! (defined(__STG_TAILJUMPS__) && defined(__GNUC__))
18
19 #define JMP_(target)    return((F_) (target))
20 #define RESUME_(target) JMP_(target)
21 \end{code}
22
23 Don't need to do anything magical for the mini-interpreter, because
24 we're really going to use the plain old C one (and the debugging
25 variant, too, for that matter).
26
27 %************************************************************************
28 %*                                                                      *
29 \subsection[COptJumps-optimised]{Tail-jumping in ``optimised~C''}
30 %*                                                                      *
31 %************************************************************************
32
33 \begin{code}
34 #else /* __STG_TAILJUMPS__ && __GNUC__ */
35 \end{code}
36
37 GCC will have assumed that pushing/popping of C-stack frames is going
38 on when it generated its code, and used stack space accordingly.
39 However, we actually {\em post-process away} all such stack-framery
40 (see \tr{ghc/driver/ghc-asm-*.lprl}).
41 Thing will be OK however, if we initially make sure there are
42 @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
43 variables.
44
45 \begin{code}
46 #define RESERVED_C_STACK_BYTES (512 * sizeof(I_))  /* MUST BE OF GENEROUS ALIGNMENT */
47 \end{code}
48
49 The platform-specific details are given in alphabetical order.
50
51 %************************************************************************
52 %*                                                                      *
53 \subsubsection[COptJumps-alpha]{Tail-jumping on Alphas}
54 %*                                                                      *
55 %************************************************************************
56
57 We have to set the procedure value register (\$27) before branching, so
58 that the target function can load the gp (\$29) as appropriate.
59
60 It seems that \tr{_procedure} can't be declared within the body of the
61 \tr{JMP_} macro...at least, not if we want it to be \$27, which we do!
62
63 \begin{code}
64 #if alpha_dec_osf1_TARGET
65     /* ToDo: less specific? */
66
67 /*
68    Jumping to a new block of code, we need to set up $27 to point
69    at the target, so that the callee can establish its gp (as an
70    offset from its own starting address).  For some reason, gcc
71    refuses to give us $27 for _procedure if it's declared as a 
72    local variable, so the workaround is to make it a global.
73
74    Note:  The local variable works in gcc 2.6.2, but fails in 2.5.8.
75  */
76
77 /* MOVED: to COptRegs.lh -- very unsatisfactorily.
78    Otherwise, we can get a "global register variable follows a
79    function definition" error.
80
81    Once we can take gcc 2.6.x as std, then we can use
82    the local variant, and the problem goes away.  (WDP 95/02)
83
84 register void *_procedure __asm__("$27");
85 */
86
87 #define JMP_(cont)                              \
88     do { _procedure = (void *)(cont);           \
89          goto *_procedure;                      \
90        } while(0)
91
92 /* 
93    When we resume at the point where a call was originally made,
94    we need to restore $26, so that gp can be reloaded appropriately.
95    However, sometimes we ``resume'' by entering a new function 
96    (typically EnterNodeCode), so we need to set up $27 as well.
97  */
98
99 #define RESUME_(cont)                           \
100     do { _procedure = (void *)(cont);           \
101          __asm__ volatile("mov $27,$26");       \
102          goto *_procedure;                      \
103        } while(0);
104
105 #define MINI_INTERPRETER_SETUP                  \
106     __asm__ volatile ("stq $9,-8($30)\n"        \
107                       "stq $10,-16($30)\n"      \
108                       "stq $11,-24($30)\n"      \
109                       "stq $12,-32($30)\n"      \
110                       "stq $13,-40($30)\n"      \
111                       "stq $14,-48($30)\n"      \
112                       "stq $15,-56($30)\n"      \
113                       "stt $f2,-64($30)\n"      \
114                       "stt $f3,-72($30)\n"      \
115                       "stt $f4,-80($30)\n"      \
116                       "stt $f5,-88($30)\n"      \
117                       "stt $f6,-96($30)\n"      \
118                       "stt $f7,-104($30)\n"     \
119                       "stt $f8,-112($30)\n"     \
120                       "stt $f9,-120($30)\n"     \
121                       "lda $30,-%0($30)" : :    \
122                       "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
123
124 #define MINI_INTERPRETER_END                    \
125     __asm__ volatile (".align 3\n"              \
126                       ".globl miniInterpretEnd\n" \
127                       "miniInterpretEnd:\n"     \
128                       "lda $30,%0($30)\n"       \
129                       "ldq $9,-8($30)\n"        \
130                       "ldq $10,-16($30)\n"      \
131                       "ldq $11,-24($30)\n"      \
132                       "ldq $12,-32($30)\n"      \
133                       "ldq $13,-40($30)\n"      \
134                       "ldq $14,-48($30)\n"      \
135                       "ldq $15,-56($30)\n"      \
136                       "ldt $f2,-64($30)\n"      \
137                       "ldt $f3,-72($30)\n"      \
138                       "ldt $f4,-80($30)\n"      \
139                       "ldt $f5,-88($30)\n"      \
140                       "ldt $f6,-96($30)\n"      \
141                       "ldt $f7,-104($30)\n"     \
142                       "ldt $f8,-112($30)\n"     \
143                       "ldt $f9,-120($30)" : :   \
144                       "K" (RESERVED_C_STACK_BYTES+8*sizeof(double)+8*sizeof(long)));
145
146 #endif /* __alpha */
147 \end{code}
148
149 %************************************************************************
150 %*                                                                      *
151 \subsubsection[COptJumps-Hpux]{Tail-jumping on a HP-PA machine running HP-UX}
152 %*                                                                      *
153 %************************************************************************
154
155 \begin{code}
156 #if hppa1_1_hp_hpux_TARGET
157
158 /* do FUNBEGIN/END the easy way */
159 #define FUNBEGIN    __asm__ volatile ("--- BEGIN ---");
160 #define FUNEND      __asm__ volatile ("--- END ---");
161
162 /* The stack grows up!  Local variables are allocated just above the
163    frame pointer, and extra arguments are stashed just below the stack
164    pointer, so the safe space is again in the middle (cf. sparc).
165  */
166
167 #define JMP_(cont)                              \
168     do { void *_procedure = (void *)(cont);     \
169          goto *_procedure;                      \
170        } while(0)
171
172 #define RESUME_(cont)   JMP_(cont)
173
174 #define MINI_INTERPRETER_SETUP                  \
175     StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];    \
176     __asm__ volatile ("ldo %0(%%r3),%%r19\n"            \
177                       "\tstw %%r3, 0(0,%%r19)\n"        \
178                       "\tstw %%r4, 4(0,%%r19)\n"        \
179                       "\tstw %%r5, 8(0,%%r19)\n"        \
180                       "\tstw %%r6,12(0,%%r19)\n"        \
181                       "\tstw %%r7,16(0,%%r19)\n"        \
182                       "\tstw %%r8,20(0,%%r19)\n"        \
183                       "\tstw %%r9,24(0,%%r19)\n"        \
184                       "\tstw %%r10,28(0,%%r19)\n"       \
185                       "\tstw %%r11,32(0,%%r19)\n"       \
186                       "\tstw %%r12,36(0,%%r19)\n"       \
187                       "\tstw %%r13,40(0,%%r19)\n"       \
188                       "\tstw %%r14,44(0,%%r19)\n"       \
189                       "\tstw %%r15,48(0,%%r19)\n"       \
190                       "\tstw %%r16,52(0,%%r19)\n"       \
191                       "\tstw %%r17,56(0,%%r19)\n"       \
192                       "\tstw %%r18,60(0,%%r19)\n"       \
193                       "\tldo 80(%%r19),%%r19\n"         \
194                       "\tfstds %%fr12,-16(0,%%r19)\n"   \
195                       "\tfstds %%fr13, -8(0,%%r19)\n"   \
196                       "\tfstds %%fr14,  0(0,%%r19)\n"   \
197                       "\tfstds %%fr15,  8(0,%%r19)\n"   \
198                       "\tldo 32(%%r19),%%r19\n"         \
199                       "\tfstds %%fr16,-16(0,%%r19)\n"   \
200                       "\tfstds %%fr17, -8(0,%%r19)\n"   \
201                       "\tfstds %%fr18,  0(0,%%r19)\n"   \
202                       "\tfstds %%fr19,  8(0,%%r19)\n"   \
203                       "\tldo 32(%%r19),%%r19\n"         \
204                       "\tfstds %%fr20,-16(0,%%r19)\n"   \
205                       "\tfstds %%fr21, -8(0,%%r19)\n" : :   \
206                       "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19" );
207
208 #define MINI_INTERPRETER_END                    \
209     __asm__ volatile (".align 4\n"              \
210                       "\t.EXPORT miniInterpretEnd,CODE\n" \
211                       "\t.EXPORT miniInterpretEnd,ENTRY,PRIV_LEV=3\n" \
212                       "miniInterpretEnd\n"              \
213                       "\tldo %0(%%r3),%%r19\n"          \
214                       "\tldw  0(0,%%r19),%%r3\n"        \
215                       "\tldw  4(0,%%r19),%%r4\n"        \
216                       "\tldw  8(0,%%r19),%%r5\n"        \
217                       "\tldw 12(0,%%r19),%%r6\n"        \
218                       "\tldw 16(0,%%r19),%%r7\n"        \
219                       "\tldw 20(0,%%r19),%%r8\n"        \
220                       "\tldw 24(0,%%r19),%%r9\n"        \
221                       "\tldw 28(0,%%r19),%%r10\n"       \
222                       "\tldw 32(0,%%r19),%%r11\n"       \
223                       "\tldw 36(0,%%r19),%%r12\n"       \
224                       "\tldw 40(0,%%r19),%%r13\n"       \
225                       "\tldw 44(0,%%r19),%%r14\n"       \
226                       "\tldw 48(0,%%r19),%%r15\n"       \
227                       "\tldw 52(0,%%r19),%%r16\n"       \
228                       "\tldw 56(0,%%r19),%%r17\n"       \
229                       "\tldw 60(0,%%r19),%%r18\n"       \
230                       "\tldo 80(%%r19),%%r19\n"         \
231                       "\tfldds -16(0,%%r19),%%fr12\n"   \
232                       "\tfldds  -8(0,%%r19),%%fr13\n"   \
233                       "\tfldds   0(0,%%r19),%%fr14\n"   \
234                       "\tfldds   8(0,%%r19),%%fr15\n"   \
235                       "\tldo 32(%%r19),%%r19\n"         \
236                       "\tfldds -16(0,%%r19),%%fr16\n"   \
237                       "\tfldds  -8(0,%%r19),%%fr17\n"   \
238                       "\tfldds   0(0,%%r19),%%fr18\n"   \
239                       "\tfldds   8(0,%%r19),%%fr19\n"   \
240                       "\tldo 32(%%r19),%%r19\n"         \
241                       "\tfldds -16(0,%%r19),%%fr20\n"   \
242                       "\tfldds  -8(0,%%r19),%%fr21\n" : :   \
243                       "n" (RESERVED_C_STACK_BYTES - (116 * sizeof(long) + 10 * sizeof(double))) : "%r19");
244
245 #endif /* hppa1.1-hp-hpux* */
246 \end{code}
247
248 %************************************************************************
249 %*                                                                      *
250 \subsubsection[COptJumps-iX86]{Tail-jumping on a 386/486}
251 %*                                                                      *
252 %************************************************************************
253
254 \begin{code}
255 #if i386_TARGET_ARCH || i486_TARGET_ARCH
256
257 /* do FUNBEGIN/END the easy way */
258 #define FUNBEGIN    __asm__ volatile ("--- BEGIN ---");
259 #define FUNEND      __asm__ volatile ("--- END ---");
260
261 /* try "m68k-style" for now */
262 extern void __DISCARD__(STG_NO_ARGS);
263
264 #define JMP_(cont)                      \
265     do { void *target;                  \
266          __DISCARD__();                 \
267          target = (void *)(cont);       \
268          goto *target;                  \
269        } while(0)
270
271 #define RESUME_(target) JMP_(target)
272
273 /* The safe part of the stack frame is near the top */
274
275 extern P_ SP_stack[];
276 extern I_ SP_stack_ptr;
277
278 #define MINI_INTERPRETER_SETUP                                  \
279     StgChar space[RESERVED_C_STACK_BYTES+4*sizeof(long)];       \
280     __asm__ volatile ("leal %c0(%%esp),%%eax\n"                 \
281                       "\tmovl %%ebx,0(%%eax)\n"                 \
282                       "\tmovl %%esi,4(%%eax)\n"                 \
283                       "\tmovl %%edi,8(%%eax)\n"                 \
284                       "\tmovl %%ebp,12(%%eax)\n"                \
285                       "\tmovl %%esp,_MainRegTable+100"          \
286                         : : "n" (RESERVED_C_STACK_BYTES)        \
287                         : "%eax");                              \
288     __asm__ volatile ("movl %%esp,%0"                           \
289                         : "=r" (SP_stack[++SP_stack_ptr]));
290
291 #define MINI_INTERPRETER_END                            \
292     __asm__ volatile (".align 4\n"                      \
293                       ".globl _miniInterpretEnd\n"      \
294                       "_miniInterpretEnd:\n"            \
295                       "\tnop"                           \
296                         : : : "memory" );               \
297     __asm__ volatile ("movl %0,%%esp\n"                 \
298                       "\tmovl %%esp,_MainRegTable+100"  \
299                         : : "m" (SP_stack[SP_stack_ptr--]) ); \
300     __asm__ volatile ("leal %c0(%%esp),%%eax\n"         \
301                       "\tmovl 0(%%eax),%%ebx\n"         \
302                       "\tmovl 4(%%eax),%%esi\n"         \
303                       "\tmovl 8(%%eax),%%edi\n"         \
304                       "\tmovl 12(%%eax),%%ebp"          \
305                         : : "n" (RESERVED_C_STACK_BYTES) : "%eax");
306
307 #endif /* __i[34]86__ */
308 \end{code}
309
310 %************************************************************************
311 %*                                                                      *
312 \subsubsection[COptJumps-m68k]{Tail-jumping on m68k boxes}
313 %*                                                                      *
314 %************************************************************************
315
316 For 680x0s, we use a quite-magic @JMP_@ macro, which includes
317 beginning- and end-of-function markers.
318
319 \begin{code}
320 #if m68k_TARGET_ARCH
321
322 #define FUNBEGIN    __asm__ volatile ("--- BEGIN ---");
323 #define FUNEND      __asm__ volatile ("--- END ---");
324 \end{code}
325
326 The call to \tr{__DISCARD__} in @JMP_@ is fodder for GCC, to force it
327 to pop arguments to previous function calls before the end of the
328 current function.  This is unnecessary if we can manage to compile
329 with \tr{-fomit-frame-pointer} as well as \tr{-fno-defer-pop}.  (WDP
330 95/02: Either false or dodgy.) At the moment, the asm mangler removes
331 these calls to \tr{__DISCARD__}.
332
333
334 \begin{code}
335 extern void __DISCARD__(STG_NO_ARGS);
336
337 #define JMP_(cont)                      \
338     do { void *target;                  \
339          __DISCARD__();                 \
340          target = (void *)(cont);       \
341          goto *target;                  \
342     } while(0)
343
344 #define RESUME_(target) JMP_(target)
345
346 #define MINI_INTERPRETER_SETUP                                  \
347     StgChar space[RESERVED_C_STACK_BYTES+11*sizeof(long)];      \
348     __asm__ volatile ("moveml a2-a6/d2-d7,sp@(%c0)\n"           \
349                       "\tlea sp@(%c0),a6" : : "J" (RESERVED_C_STACK_BYTES));
350
351 #define MINI_INTERPRETER_END                            \
352     __asm__ volatile (".even\n"                         \
353                       ".globl _miniInterpretEnd\n"      \
354                       "_miniInterpretEnd:\n"            \
355                       "\taddqw #4,sp\n"                 \
356                       "\tmoveml sp@(%c0),a2-a6/d2-d7" : : "J" (RESERVED_C_STACK_BYTES));
357
358 #endif /* __m68k__ */
359 \end{code}
360
361 %************************************************************************
362 %*                                                                      *
363 \subsubsection[COptJumps-mips]{Tail-jumping on a MIPS box}
364 %*                                                                      *
365 %************************************************************************
366
367 \begin{code}
368 #if mipseb_TARGET_ARCH || mipsel_TARGET_ARCH
369
370 /* do FUNBEGIN/END the easy way */
371 #define FUNBEGIN    __asm__ volatile ("--- BEGIN ---");
372 #define FUNEND      __asm__ volatile ("--- END ---");
373
374 /* try "m68k-style" for now */
375 extern void __DISCARD__(STG_NO_ARGS);
376
377 /* this is "alpha-style" */
378 #define JMP_(cont)                              \
379     do { __DISCARD__();                         \
380          _procedure = (void *)(cont);           \
381          goto *_procedure;                      \
382        } while(0)
383
384 #define RESUME_(target) JMP_(target)
385
386 /* _All_ callee-saved regs, whether we steal them or not, must be saved
387    (and restored).
388 */
389
390 #define MINI_INTERPRETER_SETUP                  \
391     StgChar space[RESERVED_C_STACK_BYTES+6*sizeof(double)+9*sizeof(long)]; \
392     __asm__ volatile ("addu $2,$sp,%0\n"        \
393                       "\ts.d $f20,0($2)\n"      \
394                       "\ts.d $f22,8($2)\n"      \
395                       "\ts.d $f24,16($2)\n"     \
396                       "\ts.d $f26,24($2)\n"     \
397                       "\ts.d $f28,32($2)\n"     \
398                       "\ts.d $f30,40($2)\n"     \
399                       "\tsw  $16,48($2)\n"      \
400                       "\tsw  $17,52($2)\n"      \
401                       "\tsw  $18,56($2)\n"      \
402                       "\tsw  $19,60($2)\n"      \
403                       "\tsw  $20,64($2)\n"      \
404                       "\tsw  $21,68($2)\n"      \
405                       "\tsw  $22,72($2)\n"      \
406                       "\tsw  $23,76($2)\n"      \
407                       "\tsw  $fp,80($2)\n"      \
408                       : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
409
410     /* the 16 bytes is for the argument-register save-area above $sp */
411
412 #define MINI_INTERPRETER_END                    \
413     __asm__ volatile (".align 2\n"              \
414                       ".globl miniInterpretEnd\n" \
415                       "miniInterpretEnd:\n"     \
416                       "\taddu $2,$sp,%0\n"      \
417                       "\tl.d $f20,0($2)\n"      \
418                       "\tl.d $f22,8($2)\n"      \
419                       "\tl.d $f24,16($2)\n"     \
420                       "\tl.d $f26,24($2)\n"     \
421                       "\tl.d $f28,32($2)\n"     \
422                       "\tl.d $f30,40($2)\n"     \
423                       "\tlw  $16,48($2)\n"      \
424                       "\tlw  $17,52($2)\n"      \
425                       "\tlw  $18,56($2)\n"      \
426                       "\tlw  $19,60($2)\n"      \
427                       "\tlw  $20,64($2)\n"      \
428                       "\tlw  $21,68($2)\n"      \
429                       "\tlw  $22,72($2)\n"      \
430                       "\tlw  $23,76($2)\n"      \
431                       "\tlw  $fp,80($2)\n"      \
432                       : : "I" (RESERVED_C_STACK_BYTES+16) : "$2" );
433
434 #endif /* mips */
435 \end{code}
436
437 %************************************************************************
438 %*                                                                      *
439 \subsubsection[COptJumps-RS6000]{Tail-jumping on an IBM RS6000 running AIX}
440 %*                                                                      *
441 %************************************************************************
442
443 \begin{code}
444 #if rs6000_ibm_aix_TARGET
445
446 #define JMP_(cont)      ((F_) (cont))()
447 /* partain: untested */
448
449 #endif /* rs6000-ibm-aix* */
450 \end{code}
451
452 %************************************************************************
453 %*                                                                      *
454 \subsubsection[COptJumps-sparc]{Tail-jumping on Sun4s}
455 %*                                                                      *
456 %************************************************************************
457
458 We want tailjumps to be calls, because `call xxx' is the only Sparc branch
459 that allows an arbitrary label as a target.  (Gcc's ``goto *target'' construct
460 ends up loading the label into a register and then jumping, at the cost of
461 two extra instructions for the 32-bit load.)
462
463 When entering the threaded world, we stash our return address in a known
464 location so that \tr{%i7} is available as an extra callee-saves register.
465 Of course, we have to restore this when coming out of the threaded world.
466
467 I hate this god-forsaken architecture.  Since the top of the reserved
468 stack space is used for globals and the bottom is reserved for outgoing arguments,
469 we have to stick our return address somewhere in the middle.  Currently, I'm
470 allowing 100 extra outgoing arguments beyond the first 6.  --JSM
471
472 \begin{code}
473 #if sparc_TARGET_ARCH
474
475 #ifdef solaris2_TARGET_OS
476 #define MINI_INTERPRET_END   "miniInterpretEnd"
477 #else
478 #define MINI_INTERPRET_END   "_miniInterpretEnd"
479 #endif
480
481 #define JMP_(cont)      ((F_) (cont))()
482         /* Oh so happily, the above turns into a "call" instruction,
483            which, on a SPARC, is nothing but a "jmpl" with the
484            return address in %o7 [which we don't care about].
485         */
486 #define RESUME_(target) JMP_(target)
487
488 #define MINI_INTERPRETER_SETUP                  \
489     StgChar space[RESERVED_C_STACK_BYTES+sizeof(void *)];       \
490     register void *i7 __asm__("%i7");           \
491     ((void **)(space))[100] = i7;
492
493 #define MINI_INTERPRETER_END                    \
494     __asm__ volatile (".align 4\n"              \
495             ".global " MINI_INTERPRET_END "\n"  \
496             MINI_INTERPRET_END ":\n"            \
497             "\tld %1,%0" : "=r" (i7) : "m" (((void **)(space))[100]));
498
499 #endif /* __sparc__ */
500 \end{code}
501
502 %************************************************************************
503 %*                                                                      *
504 \subsubsection[COptJumps-OOPS]{Someone screwed up here, too...}
505 %*                                                                      *
506 %************************************************************************
507
508 If one of the above machine-dependent sections wasn't triggered,
509 @JMP_@ won't be defined and you'll get link errors (if not
510 C-compiler errors).
511
512 \begin{code}
513 #if !defined(JMP_)
514 *???????* No JMP_ macro???
515 #endif
516
517 #endif /* __STG_TAILJUMPS__ */
518 \end{code}
519
520 If @FUNBEGIN@ and @FUNEND@ weren't defined, give them the default
521 (nothing).  Also, define @FB_@ and @FE_@ (short forms).
522 \begin{code}
523 #if ! defined(FUNBEGIN)
524 #define FUNBEGIN /* nothing */
525 #endif
526 #if ! defined(FUNEND)
527 #define FUNEND   /* nothing */
528 #endif
529
530 #define FB_     FUNBEGIN        /* short forms */
531 #define FE_     FUNEND
532
533 #endif /* ! that's all of... COPTJUMPS_H */
534 \end{code}